From 041839ceabbc67165512fde0d33c91347b758487 Mon Sep 17 00:00:00 2001
From: Jakub Kotur <qtr@google.com>
Date: Mon, 21 Dec 2020 17:28:15 +0100
Subject: Initial import of rayon-1.5.0.

Bug: 155309706
Change-Id: I6ff7de1cb89d093d7938abf78d586ed76da85b0d
---
 .cargo_vcs_info.json                              |    5 +
 .gitignore                                        |    6 +
 Cargo.toml                                        |   51 +
 Cargo.toml.orig                                   |   41 +
 FAQ.md                                            |  227 ++
 README.md                                         |  124 +
 RELEASES.md                                       |  732 +++++
 build.rs                                          |    6 +
 examples/README.md                                |    3 +
 examples/cpu_monitor.rs                           |   81 +
 src/collections/binary_heap.rs                    |  120 +
 src/collections/btree_map.rs                      |   66 +
 src/collections/btree_set.rs                      |   52 +
 src/collections/hash_map.rs                       |   96 +
 src/collections/hash_set.rs                       |   80 +
 src/collections/linked_list.rs                    |   66 +
 src/collections/mod.rs                            |   84 +
 src/collections/vec_deque.rs                      |  159 ++
 src/compile_fail/cannot_collect_filtermap_data.rs |   16 +
 src/compile_fail/cannot_zip_filtered_data.rs      |   16 +
 src/compile_fail/cell_par_iter.rs                 |   15 +
 src/compile_fail/mod.rs                           |    7 +
 src/compile_fail/must_use.rs                      |   67 +
 src/compile_fail/no_send_par_iter.rs              |   64 +
 src/compile_fail/rc_par_iter.rs                   |   17 +
 src/delegate.rs                                   |   70 +
 src/iter/chain.rs                                 |  268 ++
 src/iter/chunks.rs                                |  216 ++
 src/iter/cloned.rs                                |  223 ++
 src/iter/collect/consumer.rs                      |  159 ++
 src/iter/collect/mod.rs                           |  171 ++
 src/iter/collect/test.rs                          |  385 +++
 src/iter/copied.rs                                |  223 ++
 src/iter/empty.rs                                 |  104 +
 src/iter/enumerate.rs                             |  133 +
 src/iter/extend.rs                                |  376 +++
 src/iter/filter.rs                                |  141 +
 src/iter/filter_map.rs                            |  142 +
 src/iter/find.rs                                  |  120 +
 src/iter/find_first_last/mod.rs                   |  238 ++
 src/iter/find_first_last/test.rs                  |  106 +
 src/iter/flat_map.rs                              |  154 +
 src/iter/flat_map_iter.rs                         |  147 +
 src/iter/flatten.rs                               |  140 +
 src/iter/flatten_iter.rs                          |  132 +
 src/iter/fold.rs                                  |  302 ++
 src/iter/for_each.rs                              |   77 +
 src/iter/from_par_iter.rs                         |  228 ++
 src/iter/inspect.rs                               |  257 ++
 src/iter/interleave.rs                            |  336 +++
 src/iter/interleave_shortest.rs                   |   85 +
 src/iter/intersperse.rs                           |  410 +++
 src/iter/len.rs                                   |  271 ++
 src/iter/map.rs                                   |  259 ++
 src/iter/map_with.rs                              |  573 ++++
 src/iter/mod.rs                                   | 3121 +++++++++++++++++++++
 src/iter/multizip.rs                              |  338 +++
 src/iter/noop.rs                                  |   59 +
 src/iter/once.rs                                  |   68 +
 src/iter/panic_fuse.rs                            |  342 +++
 src/iter/par_bridge.rs                            |  201 ++
 src/iter/plumbing/README.md                       |  315 +++
 src/iter/plumbing/mod.rs                          |  484 ++++
 src/iter/positions.rs                             |  137 +
 src/iter/product.rs                               |  114 +
 src/iter/reduce.rs                                |  116 +
 src/iter/repeat.rs                                |  241 ++
 src/iter/rev.rs                                   |  123 +
 src/iter/skip.rs                                  |   88 +
 src/iter/splitter.rs                              |  174 ++
 src/iter/step_by.rs                               |  144 +
 src/iter/sum.rs                                   |  110 +
 src/iter/take.rs                                  |   86 +
 src/iter/test.rs                                  | 2188 +++++++++++++++
 src/iter/try_fold.rs                              |  294 ++
 src/iter/try_reduce.rs                            |  129 +
 src/iter/try_reduce_with.rs                       |  134 +
 src/iter/unzip.rs                                 |  464 +++
 src/iter/update.rs                                |  327 +++
 src/iter/while_some.rs                            |  154 +
 src/iter/zip.rs                                   |  159 ++
 src/iter/zip_eq.rs                                |   72 +
 src/lib.rs                                        |  120 +
 src/math.rs                                       |   54 +
 src/option.rs                                     |  203 ++
 src/par_either.rs                                 |   74 +
 src/prelude.rs                                    |   17 +
 src/private.rs                                    |   26 +
 src/range.rs                                      |  368 +++
 src/range_inclusive.rs                            |  288 ++
 src/result.rs                                     |  132 +
 src/slice/mergesort.rs                            |  763 +++++
 src/slice/mod.rs                                  | 1203 ++++++++
 src/slice/quicksort.rs                            |  800 ++++++
 src/slice/test.rs                                 |  148 +
 src/split_producer.rs                             |  132 +
 src/str.rs                                        |  874 ++++++
 src/string.rs                                     |   48 +
 src/vec.rs                                        |  245 ++
 tests/chars.rs                                    |   39 +
 tests/clones.rs                                   |  186 ++
 tests/collect.rs                                  |  111 +
 tests/cross-pool.rs                               |   21 +
 tests/debug.rs                                    |  209 ++
 tests/intersperse.rs                              |   60 +
 tests/issue671-unzip.rs                           |   17 +
 tests/issue671.rs                                 |   16 +
 tests/iter_panic.rs                               |   52 +
 tests/named-threads.rs                            |   24 +
 tests/octillion.rs                                |  130 +
 tests/producer_split_at.rs                        |  344 +++
 tests/sort-panic-safe.rs                          |  162 ++
 tests/str.rs                                      |  116 +
 113 files changed, 25811 insertions(+)
 create mode 100644 .cargo_vcs_info.json
 create mode 100644 .gitignore
 create mode 100644 Cargo.toml
 create mode 100644 Cargo.toml.orig
 create mode 100644 FAQ.md
 create mode 100644 README.md
 create mode 100644 RELEASES.md
 create mode 100644 build.rs
 create mode 100644 examples/README.md
 create mode 100644 examples/cpu_monitor.rs
 create mode 100644 src/collections/binary_heap.rs
 create mode 100644 src/collections/btree_map.rs
 create mode 100644 src/collections/btree_set.rs
 create mode 100644 src/collections/hash_map.rs
 create mode 100644 src/collections/hash_set.rs
 create mode 100644 src/collections/linked_list.rs
 create mode 100644 src/collections/mod.rs
 create mode 100644 src/collections/vec_deque.rs
 create mode 100644 src/compile_fail/cannot_collect_filtermap_data.rs
 create mode 100644 src/compile_fail/cannot_zip_filtered_data.rs
 create mode 100644 src/compile_fail/cell_par_iter.rs
 create mode 100644 src/compile_fail/mod.rs
 create mode 100644 src/compile_fail/must_use.rs
 create mode 100644 src/compile_fail/no_send_par_iter.rs
 create mode 100644 src/compile_fail/rc_par_iter.rs
 create mode 100644 src/delegate.rs
 create mode 100644 src/iter/chain.rs
 create mode 100644 src/iter/chunks.rs
 create mode 100644 src/iter/cloned.rs
 create mode 100644 src/iter/collect/consumer.rs
 create mode 100644 src/iter/collect/mod.rs
 create mode 100644 src/iter/collect/test.rs
 create mode 100644 src/iter/copied.rs
 create mode 100644 src/iter/empty.rs
 create mode 100644 src/iter/enumerate.rs
 create mode 100644 src/iter/extend.rs
 create mode 100644 src/iter/filter.rs
 create mode 100644 src/iter/filter_map.rs
 create mode 100644 src/iter/find.rs
 create mode 100644 src/iter/find_first_last/mod.rs
 create mode 100644 src/iter/find_first_last/test.rs
 create mode 100644 src/iter/flat_map.rs
 create mode 100644 src/iter/flat_map_iter.rs
 create mode 100644 src/iter/flatten.rs
 create mode 100644 src/iter/flatten_iter.rs
 create mode 100644 src/iter/fold.rs
 create mode 100644 src/iter/for_each.rs
 create mode 100644 src/iter/from_par_iter.rs
 create mode 100644 src/iter/inspect.rs
 create mode 100644 src/iter/interleave.rs
 create mode 100644 src/iter/interleave_shortest.rs
 create mode 100644 src/iter/intersperse.rs
 create mode 100644 src/iter/len.rs
 create mode 100644 src/iter/map.rs
 create mode 100644 src/iter/map_with.rs
 create mode 100644 src/iter/mod.rs
 create mode 100644 src/iter/multizip.rs
 create mode 100644 src/iter/noop.rs
 create mode 100644 src/iter/once.rs
 create mode 100644 src/iter/panic_fuse.rs
 create mode 100644 src/iter/par_bridge.rs
 create mode 100644 src/iter/plumbing/README.md
 create mode 100644 src/iter/plumbing/mod.rs
 create mode 100644 src/iter/positions.rs
 create mode 100644 src/iter/product.rs
 create mode 100644 src/iter/reduce.rs
 create mode 100644 src/iter/repeat.rs
 create mode 100644 src/iter/rev.rs
 create mode 100644 src/iter/skip.rs
 create mode 100644 src/iter/splitter.rs
 create mode 100644 src/iter/step_by.rs
 create mode 100644 src/iter/sum.rs
 create mode 100644 src/iter/take.rs
 create mode 100644 src/iter/test.rs
 create mode 100644 src/iter/try_fold.rs
 create mode 100644 src/iter/try_reduce.rs
 create mode 100644 src/iter/try_reduce_with.rs
 create mode 100644 src/iter/unzip.rs
 create mode 100644 src/iter/update.rs
 create mode 100644 src/iter/while_some.rs
 create mode 100644 src/iter/zip.rs
 create mode 100644 src/iter/zip_eq.rs
 create mode 100644 src/lib.rs
 create mode 100644 src/math.rs
 create mode 100644 src/option.rs
 create mode 100644 src/par_either.rs
 create mode 100644 src/prelude.rs
 create mode 100644 src/private.rs
 create mode 100644 src/range.rs
 create mode 100644 src/range_inclusive.rs
 create mode 100644 src/result.rs
 create mode 100644 src/slice/mergesort.rs
 create mode 100644 src/slice/mod.rs
 create mode 100644 src/slice/quicksort.rs
 create mode 100644 src/slice/test.rs
 create mode 100644 src/split_producer.rs
 create mode 100644 src/str.rs
 create mode 100644 src/string.rs
 create mode 100644 src/vec.rs
 create mode 100644 tests/chars.rs
 create mode 100644 tests/clones.rs
 create mode 100644 tests/collect.rs
 create mode 100644 tests/cross-pool.rs
 create mode 100644 tests/debug.rs
 create mode 100644 tests/intersperse.rs
 create mode 100644 tests/issue671-unzip.rs
 create mode 100644 tests/issue671.rs
 create mode 100644 tests/iter_panic.rs
 create mode 100644 tests/named-threads.rs
 create mode 100644 tests/octillion.rs
 create mode 100644 tests/producer_split_at.rs
 create mode 100644 tests/sort-panic-safe.rs
 create mode 100644 tests/str.rs

diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json
new file mode 100644
index 0000000..c28c857
--- /dev/null
+++ b/.cargo_vcs_info.json
@@ -0,0 +1,5 @@
+{
+  "git": {
+    "sha1": "dc13cb7875ad43c7d1ea8b1e504b09c031f7ed5a"
+  }
+}
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..274bf18
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,6 @@
+Cargo.lock
+target
+*~
+TAGS
+*.bk
+.idea
\ No newline at end of file
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..2129b6f
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,51 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies
+#
+# If you believe there's an error in this file please file an
+# issue against the rust-lang/cargo repository. If you're
+# editing this file be aware that the upstream Cargo.toml
+# will likely look very different (and much more reasonable)
+
+[package]
+edition = "2018"
+name = "rayon"
+version = "1.5.0"
+authors = ["Niko Matsakis <niko@alum.mit.edu>", "Josh Stone <cuviper@gmail.com>"]
+exclude = ["/ci/*", "/scripts/*", "/.github/*", "/bors.toml"]
+description = "Simple work-stealing parallelism for Rust"
+documentation = "https://docs.rs/rayon/"
+readme = "README.md"
+keywords = ["parallel", "thread", "concurrency", "join", "performance"]
+categories = ["concurrency"]
+license = "Apache-2.0/MIT"
+repository = "https://github.com/rayon-rs/rayon"
+[dependencies.crossbeam-deque]
+version = "0.8.0"
+
+[dependencies.either]
+version = "1.0"
+default-features = false
+
+[dependencies.rayon-core]
+version = "1.9.0"
+[dev-dependencies.docopt]
+version = "1"
+
+[dev-dependencies.lazy_static]
+version = "1"
+
+[dev-dependencies.rand]
+version = "0.7"
+
+[dev-dependencies.rand_xorshift]
+version = "0.2"
+
+[dev-dependencies.serde]
+version = "1.0.85"
+features = ["derive"]
+[build-dependencies.autocfg]
+version = "1"
diff --git a/Cargo.toml.orig b/Cargo.toml.orig
new file mode 100644
index 0000000..a7600e1
--- /dev/null
+++ b/Cargo.toml.orig
@@ -0,0 +1,41 @@
+[package]
+name = "rayon"
+# Reminder to update html_rool_url in lib.rs when updating version
+version = "1.5.0"
+authors = ["Niko Matsakis <niko@alum.mit.edu>",
+           "Josh Stone <cuviper@gmail.com>"]
+description = "Simple work-stealing parallelism for Rust"
+edition = "2018"
+license = "Apache-2.0/MIT"
+repository = "https://github.com/rayon-rs/rayon"
+documentation = "https://docs.rs/rayon/"
+readme = "README.md"
+keywords = ["parallel", "thread", "concurrency", "join", "performance"]
+categories = ["concurrency"]
+exclude = ["/ci/*", "/scripts/*", "/.github/*", "/bors.toml"]
+
+[workspace]
+members = ["rayon-demo", "rayon-core"]
+exclude = ["ci"]
+
+[dependencies]
+rayon-core = { version = "1.9.0", path = "rayon-core" }
+crossbeam-deque = "0.8.0"
+
+# This is a public dependency!
+[dependencies.either]
+version = "1.0"
+default-features = false
+
+[dev-dependencies]
+docopt = "1"
+lazy_static = "1"
+rand = "0.7"
+rand_xorshift = "0.2"
+
+[dev-dependencies.serde]
+version = "1.0.85"
+features = ["derive"]
+
+[build-dependencies]
+autocfg = "1"
diff --git a/FAQ.md b/FAQ.md
new file mode 100644
index 0000000..11117d3
--- /dev/null
+++ b/FAQ.md
@@ -0,0 +1,227 @@
+# Rayon FAQ
+
+This file is for general questions that don't fit into the README or
+crate docs.
+
+## How many threads will Rayon spawn?
+
+By default, Rayon uses the same number of threads as the number of
+CPUs available. Note that on systems with hyperthreading enabled this
+equals the number of logical cores and not the physical ones.
+
+If you want to alter the number of threads spawned, you can set the
+environmental variable `RAYON_NUM_THREADS` to the desired number of
+threads or use the
+[`ThreadPoolBuilder::build_global` function](https://docs.rs/rayon/*/rayon/struct.ThreadPoolBuilder.html#method.build_global)
+method.
+
+## How does Rayon balance work between threads?
+
+Behind the scenes, Rayon uses a technique called **work stealing** to
+try and dynamically ascertain how much parallelism is available and
+exploit it. The idea is very simple: we always have a pool of worker
+threads available, waiting for some work to do. When you call `join`
+the first time, we shift over into that pool of threads. But if you
+call `join(a, b)` from a worker thread W, then W will place `b` into
+its work queue, advertising that this is work that other worker
+threads might help out with. W will then start executing `a`.
+
+While W is busy with `a`, other threads might come along and take `b`
+from its queue. That is called *stealing* `b`. Once `a` is done, W
+checks whether `b` was stolen by another thread and, if not, executes
+`b` itself. If W runs out of jobs in its own queue, it will look
+through the other threads' queues and try to steal work from them.
+
+This technique is not new. It was first introduced by the
+[Cilk project][cilk], done at MIT in the late nineties. The name Rayon
+is an homage to that work.
+
+[cilk]: http://supertech.csail.mit.edu/cilk/
+
+## What should I do if I use `Rc`, `Cell`, `RefCell` or other non-Send-and-Sync types?
+
+There are a number of non-threadsafe types in the Rust standard library,
+and if your code is using them, you will not be able to combine it
+with Rayon. Similarly, even if you don't have such types, but you try
+to have multiple closures mutating the same state, you will get
+compilation errors; for example, this function won't work, because
+both closures access `slice`:
+
+```rust
+/// Increment all values in slice.
+fn increment_all(slice: &mut [i32]) {
+    rayon::join(|| process(slice), || process(slice));
+}
+```
+
+The correct way to resolve such errors will depend on the case.  Some
+cases are easy: for example, uses of [`Rc`] can typically be replaced
+with [`Arc`], which is basically equivalent, but thread-safe.
+
+Code that uses `Cell` or `RefCell`, however, can be somewhat more complicated.
+If you can refactor your code to avoid those types, that is often the best way
+forward, but otherwise, you can try to replace those types with their threadsafe
+equivalents:
+
+- `Cell` -- replacement: `AtomicUsize`, `AtomicBool`, etc
+- `RefCell` -- replacement: `RwLock`, or perhaps `Mutex`
+
+However, you have to be wary! The parallel versions of these types
+have different atomicity guarantees. For example, with a `Cell`, you
+can increment a counter like so:
+
+```rust
+let value = counter.get();
+counter.set(value + 1);
+```
+
+But when you use the equivalent `AtomicUsize` methods, you are
+actually introducing a potential race condition (not a data race,
+technically, but it can be an awfully fine distinction):
+
+```rust
+let value = tscounter.load(Ordering::SeqCst);
+tscounter.store(value + 1, Ordering::SeqCst);
+```
+
+You can already see that the `AtomicUsize` API is a bit more complex,
+as it requires you to specify an
+[ordering](http://doc.rust-lang.org/std/sync/atomic/enum.Ordering.html). (I
+won't go into the details on ordering here, but suffice to say that if
+you don't know what an ordering is, and probably even if you do, you
+should use `Ordering::SeqCst`.) The danger in this parallel version of
+the counter is that other threads might be running at the same time
+and they could cause our counter to get out of sync. For example, if
+we have two threads, then they might both execute the "load" before
+either has a chance to execute the "store":
+
+```
+Thread 1                                          Thread 2
+let value = tscounter.load(Ordering::SeqCst);
+// value = X                                      let value = tscounter.load(Ordering::SeqCst);
+                                                  // value = X
+tscounter.store(value+1);                         tscounter.store(value+1);
+// tscounter = X+1                                // tscounter = X+1
+```
+
+Now even though we've had two increments, we'll only increase the
+counter by one!  Even though we've got no data race, this is still
+probably not the result we wanted. The problem here is that the `Cell`
+API doesn't make clear the scope of a "transaction" -- that is, the
+set of reads/writes that should occur atomically. In this case, we
+probably wanted the get/set to occur together.
+
+In fact, when using the `Atomic` types, you very rarely want a plain
+`load` or plain `store`. You probably want the more complex
+operations. A counter, for example, would use `fetch_add` to
+atomically load and increment the value in one step. Compare-and-swap
+is another popular building block.
+
+A similar problem can arise when converting `RefCell` to `RwLock`, but
+it is somewhat less likely, because the `RefCell` API does in fact
+have a notion of a transaction: the scope of the handle returned by
+`borrow` or `borrow_mut`. So if you convert each call to `borrow` to
+`read` (and `borrow_mut` to `write`), things will mostly work fine in
+a parallel setting, but there can still be changes in behavior.
+Consider using a `handle: RefCell<Vec<i32>>` like:
+
+```rust
+let len = handle.borrow().len();
+for i in 0 .. len {
+    let data = handle.borrow()[i];
+    println!("{}", data);
+}
+```
+
+In sequential code, we know that this loop is safe. But if we convert
+this to parallel code with an `RwLock`, we do not: this is because
+another thread could come along and do
+`handle.write().unwrap().pop()`, and thus change the length of the
+vector. In fact, even in *sequential* code, using very small borrow
+sections like this is an anti-pattern: you ought to be enclosing the
+entire transaction together, like so:
+
+```rust
+let vec = handle.borrow();
+let len = vec.len();
+for i in 0 .. len {
+    let data = vec[i];
+    println!("{}", data);
+}
+```
+
+Or, even better, using an iterator instead of indexing:
+
+```rust
+let vec = handle.borrow();
+for data in vec {
+    println!("{}", data);
+}
+```
+
+There are several reasons to prefer one borrow over many. The most
+obvious is that it is more efficient, since each borrow has to perform
+some safety checks. But it's also more reliable: suppose we modified
+the loop above to not just print things out, but also call into a
+helper function:
+
+```rust
+let vec = handle.borrow();
+for data in vec {
+    helper(...);
+}
+```
+
+And now suppose, independently, this helper fn evolved and had to pop
+something off of the vector:
+
+```rust
+fn helper(...) {
+    handle.borrow_mut().pop();
+}
+```
+
+Under the old model, where we did lots of small borrows, this would
+yield precisely the same error that we saw in parallel land using an
+`RwLock`: the length would be out of sync and our indexing would fail
+(note that in neither case would there be an actual *data race* and
+hence there would never be undefined behavior). But now that we use a
+single borrow, we'll see a borrow error instead, which is much easier
+to diagnose, since it occurs at the point of the `borrow_mut`, rather
+than downstream. Similarly, if we move to an `RwLock`, we'll find that
+the code either deadlocks (if the write is on the same thread as the
+read) or, if the write is on another thread, works just fine. Both of
+these are preferable to random failures in my experience.
+
+## But wait, isn't Rust supposed to free me from this kind of thinking?
+
+You might think that Rust is supposed to mean that you don't have to
+think about atomicity at all. In fact, if you avoid interior
+mutability (`Cell` and `RefCell` in a sequential setting, or
+`AtomicUsize`, `RwLock`, `Mutex`, et al. in parallel code), then this
+is true: the type system will basically guarantee that you don't have
+to think about atomicity at all. But often there are times when you
+WANT threads to interleave in the ways I showed above.
+
+Consider for example when you are conducting a search in parallel, say
+to find the shortest route. To avoid fruitless search, you might want
+to keep a cell with the shortest route you've found thus far.  This
+way, when you are searching down some path that's already longer than
+this shortest route, you can just stop and avoid wasted effort. In
+sequential land, you might model this "best result" as a shared value
+like `Rc<Cell<usize>>` (here the `usize` represents the length of best
+path found so far); in parallel land, you'd use a `Arc<AtomicUsize>`.
+Now we can make our search function look like:
+
+```rust
+fn search(path: &Path, cost_so_far: usize, best_cost: &Arc<AtomicUsize>) {
+    if cost_so_far >= best_cost.load(Ordering::SeqCst) {
+        return;
+    }
+    ...
+    best_cost.store(...);
+}
+```
+
+Now in this case, we really WANT to see results from other threads
+interjected into our execution!
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..869c537
--- /dev/null
+++ b/README.md
@@ -0,0 +1,124 @@
+# Rayon
+
+[![Rayon crate](https://img.shields.io/crates/v/rayon.svg)](https://crates.io/crates/rayon)
+[![Rayon documentation](https://docs.rs/rayon/badge.svg)](https://docs.rs/rayon)
+![minimum rustc 1.36](https://img.shields.io/badge/rustc-1.36+-red.svg)
+![build status](https://github.com/rayon-rs/rayon/workflows/master/badge.svg)
+[![Join the chat at https://gitter.im/rayon-rs/Lobby](https://badges.gitter.im/rayon-rs/Lobby.svg)](https://gitter.im/rayon-rs/Lobby?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
+
+Rayon is a data-parallelism library for Rust. It is extremely
+lightweight and makes it easy to convert a sequential computation into
+a parallel one. It also guarantees data-race freedom. (You may also
+enjoy [this blog post][blog] about Rayon, which gives more background
+and details about how it works, or [this video][video], from the Rust
+Belt Rust conference.) Rayon is
+[available on crates.io](https://crates.io/crates/rayon), and
+[API Documentation is available on docs.rs](https://docs.rs/rayon/).
+
+[blog]: http://smallcultfollowing.com/babysteps/blog/2015/12/18/rayon-data-parallelism-in-rust/
+[video]: https://www.youtube.com/watch?v=gof_OEv71Aw
+
+## Parallel iterators and more
+
+Rayon makes it drop-dead simple to convert sequential iterators into
+parallel ones: usually, you just change your `foo.iter()` call into
+`foo.par_iter()`, and Rayon does the rest:
+
+```rust
+use rayon::prelude::*;
+fn sum_of_squares(input: &[i32]) -> i32 {
+    input.par_iter() // <-- just change that!
+         .map(|&i| i * i)
+         .sum()
+}
+```
+
+[Parallel iterators] take care of deciding how to divide your data
+into tasks; it will dynamically adapt for maximum performance. If you
+need more flexibility than that, Rayon also offers the [join] and
+[scope] functions, which let you create parallel tasks on your own.
+For even more control, you can create [custom threadpools] rather than
+using Rayon's default, global threadpool.
+
+[Parallel iterators]: https://docs.rs/rayon/*/rayon/iter/index.html
+[join]: https://docs.rs/rayon/*/rayon/fn.join.html
+[scope]: https://docs.rs/rayon/*/rayon/fn.scope.html
+[custom threadpools]: https://docs.rs/rayon/*/rayon/struct.ThreadPool.html
+
+## No data races
+
+You may have heard that parallel execution can produce all kinds of
+crazy bugs. Well, rest easy. Rayon's APIs all guarantee **data-race
+freedom**, which generally rules out most parallel bugs (though not
+all). In other words, **if your code compiles**, it typically does the
+same thing it did before.
+
+For the most, parallel iterators in particular are guaranteed to
+produce the same results as their sequential counterparts. One caveat:
+If your iterator has side effects (for example, sending methods to
+other threads through a [Rust channel] or writing to disk), those side
+effects may occur in a different order. Note also that, in some cases,
+parallel iterators offer alternative versions of the sequential
+iterator methods that can have higher performance.
+
+[Rust channel]: https://doc.rust-lang.org/std/sync/mpsc/fn.channel.html
+
+## Using Rayon
+
+[Rayon is available on crates.io](https://crates.io/crates/rayon). The
+recommended way to use it is to add a line into your Cargo.toml such
+as:
+
+```toml
+[dependencies]
+rayon = "1.5"
+```
+
+To use the Parallel Iterator APIs, a number of traits have to be in
+scope. The easiest way to bring those things into scope is to use the
+[Rayon prelude](https://docs.rs/rayon/*/rayon/prelude/index.html).  In
+each module where you would like to use the parallel iterator APIs,
+just add:
+
+```rust
+use rayon::prelude::*;
+```
+
+Rayon currently requires `rustc 1.36.0` or greater.
+
+## Contribution
+
+Rayon is an open source project! If you'd like to contribute to Rayon, check out [the list of "help wanted" issues](https://github.com/rayon-rs/rayon/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22). These are all (or should be) issues that are suitable for getting started, and they generally include a detailed set of instructions for what to do. Please ask questions if anything is unclear! Also, check out the [Guide to Development](https://github.com/rayon-rs/rayon/wiki/Guide-to-Development) page on the wiki. Note that all code submitted in PRs to Rayon is assumed to [be licensed under Rayon's dual MIT/Apache2 licensing](https://github.com/rayon-rs/rayon/blob/master/README.md#license).
+
+## Quick demo
+
+To see Rayon in action, check out the `rayon-demo` directory, which
+includes a number of demos of code using Rayon. For example, run this
+command to get a visualization of an nbody simulation. To see the
+effect of using Rayon, press `s` to run sequentially and `p` to run in
+parallel.
+
+```text
+> cd rayon-demo
+> cargo run --release -- nbody visualize
+```
+
+For more information on demos, try:
+
+```text
+> cd rayon-demo
+> cargo run --release -- --help
+```
+
+## Other questions?
+
+See [the Rayon FAQ][faq].
+
+[faq]: https://github.com/rayon-rs/rayon/blob/master/FAQ.md
+
+## License
+
+Rayon is distributed under the terms of both the MIT license and the
+Apache License (Version 2.0). See [LICENSE-APACHE](LICENSE-APACHE) and
+[LICENSE-MIT](LICENSE-MIT) for details. Opening a pull requests is
+assumed to signal agreement with these licensing terms.
diff --git a/RELEASES.md b/RELEASES.md
new file mode 100644
index 0000000..fa452c3
--- /dev/null
+++ b/RELEASES.md
@@ -0,0 +1,732 @@
+# Release rayon 1.5.0 / rayon-core 1.9.0 (2020-10-21)
+
+- Update crossbeam dependencies.
+- The minimum supported `rustc` is now 1.36.
+
+## Contributors
+
+Thanks to all of the contributors for this release!
+
+- @cuviper
+- @mbrubeck
+- @mrksu
+
+# Release rayon 1.4.1 (2020-09-29)
+
+- The new `flat_map_iter` and `flatten_iter` methods can be used to flatten
+  sequential iterators, which may perform better in cases that don't need the
+  nested parallelism of `flat_map` and `flatten`.
+- The new `par_drain` method is a parallel version of the standard `drain` for
+  collections, removing items while keeping the original capacity. Collections
+  that implement this through `ParallelDrainRange` support draining items from
+  arbitrary index ranges, while `ParallelDrainFull` always drains everything.
+- The new `positions` method finds all items that match the given predicate and
+  returns their indices in a new iterator.
+
+# Release rayon-core 1.8.1 (2020-09-17)
+
+- Fixed an overflow panic on high-contention workloads, for a counter that was
+  meant to simply wrap. This panic only occurred with debug assertions enabled,
+  and was much more likely on 32-bit targets.
+
+# Release rayon 1.4.0 / rayon-core 1.8.0 (2020-08-24)
+
+- Implemented a new thread scheduler, [RFC 5], which uses targeted wakeups for
+  new work and for notifications of completed stolen work, reducing wasteful
+  CPU usage in idle threads.
+- Implemented `IntoParallelIterator for Range<char>` and `RangeInclusive<char>`
+  with the same iteration semantics as Rust 1.45.
+- Relaxed the lifetime requirements of the initial `scope` closure.
+
+[RFC 5]: https://github.com/rayon-rs/rfcs/pull/5
+
+## Contributors
+
+Thanks to all of the contributors for this release!
+
+- @CAD97
+- @cuviper
+- @kmaork
+- @nikomatsakis
+- @SuperFluffy
+
+
+# Release rayon 1.3.1 / rayon-core 1.7.1 (2020-06-15)
+
+- Fixed a use-after-free race in calls blocked between two rayon thread pools.
+- Collecting to an indexed `Vec` now drops any partial writes while unwinding,
+  rather than just leaking them. If dropping also panics, Rust will abort.
+  - Note: the old leaking behavior is considered _safe_, just not ideal.
+- The new `IndexedParallelIterator::step_by()` adapts an iterator to step
+  through items by the given count, like `Iterator::step_by()`.
+- The new `ParallelSlice::par_chunks_exact()` and mutable equivalent
+  `ParallelSliceMut::par_chunks_exact_mut()` ensure that the chunks always have
+  the exact length requested, leaving any remainder separate, like the slice
+  methods `chunks_exact()` and `chunks_exact_mut()`.
+
+## Contributors
+
+Thanks to all of the contributors for this release!
+
+- @adrian5
+- @bluss
+- @cuviper
+- @FlyingCanoe
+- @GuillaumeGomez
+- @matthiasbeyer
+- @picoHz
+- @zesterer
+
+
+# Release rayon 1.3.0 / rayon-core 1.7.0 (2019-12-21)
+
+- Tuples up to length 12 now implement `IntoParallelIterator`, creating a
+  `MultiZip` iterator that produces items as similarly-shaped tuples.
+- The `--cfg=rayon_unstable` supporting code for `rayon-futures` is removed.
+- The minimum supported `rustc` is now 1.31.
+
+## Contributors
+
+Thanks to all of the contributors for this release!
+
+- @cuviper
+- @c410-f3r
+- @silwol
+
+
+# Release rayon-futures 0.1.1 (2019-12-21)
+
+- `Send` bounds have been added for the `Item` and `Error` associated types on
+  all generic `F: Future` interfaces. While technically a breaking change, this
+  is a soundness fix, so we are not increasing the semantic version for this.
+- This crate is now deprecated, and the `--cfg=rayon_unstable` supporting code
+  will be removed in `rayon-core 1.7.0`. This only supported the now-obsolete
+  `Future` from `futures 0.1`, while support for `std::future::Future` is
+  expected to come directly in `rayon-core` -- although that is not ready yet.
+
+## Contributors
+
+Thanks to all of the contributors for this release!
+
+- @cuviper
+- @kornelski
+- @jClaireCodesStuff
+- @jwass
+- @seanchen1991
+
+
+# Release rayon 1.2.1 / rayon-core 1.6.1 (2019-11-20)
+
+- Update crossbeam dependencies.
+- Add top-level doc links for the iterator traits.
+- Document that the iterator traits are not object safe.
+
+## Contributors
+
+Thanks to all of the contributors for this release!
+
+- @cuviper
+- @dnaka91
+- @matklad
+- @nikomatsakis
+- @Qqwy
+- @vorner
+
+
+# Release rayon 1.2.0 / rayon-core 1.6.0 (2019-08-30)
+
+- The new `ParallelIterator::copied()` converts an iterator of references into
+  copied values, like `Iterator::copied()`.
+- `ParallelExtend` is now implemented for the unit `()`.
+- Internal updates were made to improve test determinism, reduce closure type
+  sizes, reduce task allocations, and update dependencies.
+- The minimum supported `rustc` is now 1.28.
+
+## Contributors
+
+Thanks to all of the contributors for this release!
+
+- @Aaron1011
+- @cuviper
+- @ralfbiedert
+
+
+# Release rayon 1.1.0 / rayon-core 1.5.0 (2019-06-12)
+
+- FIFO spawns are now supported using the new `spawn_fifo()` and `scope_fifo()`
+  global functions, and their corresponding `ThreadPool` methods.
+  - Normally when tasks are queued on a thread, the most recent is processed
+    first (LIFO) while other threads will steal the oldest (FIFO). With FIFO
+    spawns, those tasks are processed locally in FIFO order too.
+  - Regular spawns and other tasks like `join` are not affected.
+  - The `breadth_first` configuration flag, which globally approximated this
+    effect, is now deprecated.
+  - For more design details, please see [RFC 1].
+- `ThreadPoolBuilder` can now take a custom `spawn_handler` to control how
+  threads will be created in the pool.
+  - `ThreadPoolBuilder::build_scoped()` uses this to create a scoped thread
+    pool, where the threads are able to use non-static data.
+  - This may also be used to support threading in exotic environments, like
+    WebAssembly, which don't support the normal `std::thread`.
+- `ParallelIterator` has 3 new methods: `find_map_any()`, `find_map_first()`,
+  and `find_map_last()`, like `Iterator::find_map()` with ordering constraints.
+- The new `ParallelIterator::panic_fuse()` makes a parallel iterator halt as soon
+  as possible if any of its threads panic. Otherwise, the panic state is not
+  usually noticed until the iterator joins its parallel tasks back together.
+- `IntoParallelIterator` is now implemented for integral `RangeInclusive`.
+- Several internal `Folder`s now have optimized `consume_iter` implementations.
+- `rayon_core::current_thread_index()` is now re-exported in `rayon`.
+- The minimum `rustc` is now 1.26, following the update policy defined in [RFC 3].
+
+## Contributors
+
+Thanks to all of the contributors for this release!
+
+- @cuviper
+- @didroe
+- @GuillaumeGomez
+- @huonw
+- @janriemer
+- @kornelski
+- @nikomatsakis
+- @seanchen1991
+- @yegeun542
+
+[RFC 1]: https://github.com/rayon-rs/rfcs/blob/master/accepted/rfc0001-scope-scheduling.md
+[RFC 3]: https://github.com/rayon-rs/rfcs/blob/master/accepted/rfc0003-minimum-rustc.md
+
+
+# Release rayon 1.0.3 (2018-11-02)
+
+- `ParallelExtend` is now implemented for tuple pairs, enabling nested
+  `unzip()` and `partition_map()` operations.  For instance, `(A, (B, C))`
+  items can be unzipped into `(Vec<A>, (Vec<B>, Vec<C>))`.
+  - `ParallelExtend<(A, B)>` works like `unzip()`.
+  - `ParallelExtend<Either<A, B>>` works like `partition_map()`.
+- `ParallelIterator` now has a method `map_init()` which calls an `init`
+  function for a value to pair with items, like `map_with()` but dynamically
+  constructed.  That value type has no constraints, not even `Send` or `Sync`.
+  - The new `for_each_init()` is a variant of this for simple iteration.
+  - The new `try_for_each_init()` is a variant for fallible iteration.
+
+## Contributors
+
+Thanks to all of the contributors for this release!
+
+- @cuviper
+- @dan-zheng
+- @dholbert
+- @ignatenkobrain
+- @mdonoughe
+
+
+# Release rayon 1.0.2 / rayon-core 1.4.1 (2018-07-17)
+
+- The `ParallelBridge` trait with method `par_bridge()` makes it possible to
+  use any `Send`able `Iterator` in parallel!
+  - This trait has been added to `rayon::prelude`.
+  - It automatically implements internal synchronization and queueing to
+    spread the `Item`s across the thread pool.  Iteration order is not
+    preserved by this adaptor.
+  - "Native" Rayon iterators like `par_iter()` should still be preferred when
+    possible for better efficiency.
+- `ParallelString` now has additional methods for parity with `std` string
+  iterators: `par_char_indices()`, `par_bytes()`, `par_encode_utf16()`,
+  `par_matches()`, and `par_match_indices()`.
+- `ParallelIterator` now has fallible methods `try_fold()`, `try_reduce()`,
+  and `try_for_each`, plus `*_with()` variants of each, for automatically
+  short-circuiting iterators on `None` or `Err` values.  These are inspired by
+  `Iterator::try_fold()` and `try_for_each()` that were stabilized in Rust 1.27.
+- `Range<i128>` and `Range<u128>` are now supported with Rust 1.26 and later.
+- Small improvements have been made to the documentation.
+- `rayon-core` now only depends on `rand` for testing.
+- Rayon tests now work on stable Rust.
+
+## Contributors
+
+Thanks to all of the contributors for this release!
+
+- @AndyGauge
+- @cuviper
+- @ignatenkobrain
+- @LukasKalbertodt
+- @MajorBreakfast
+- @nikomatsakis
+- @paulkernfeld
+- @QuietMisdreavus
+
+
+# Release rayon 1.0.1 (2018-03-16)
+
+- Added more documentation for `rayon::iter::split()`.
+- Corrected links and typos in documentation.
+
+## Contributors
+
+Thanks to all of the contributors for this release!
+
+- @cuviper
+- @HadrienG2
+- @matthiasbeyer
+- @nikomatsakis
+
+
+# Release rayon 1.0.0 / rayon-core 1.4.0 (2018-02-15)
+
+- `ParallelIterator` added the `update` method which applies a function to
+  mutable references, inspired by `itertools`.
+- `IndexedParallelIterator` added the `chunks` method which yields vectors of
+  consecutive items from the base iterator, inspired by `itertools`.
+- `String` now implements `FromParallelIterator<Cow<str>>` and
+  `ParallelExtend<Cow<str>>`, inspired by `std`.
+- `()` now implements `FromParallelIterator<()>`, inspired by `std`.
+- The new `ThreadPoolBuilder` replaces and deprecates `Configuration`.
+  - Errors from initialization now have the concrete `ThreadPoolBuildError`
+    type, rather than `Box<Error>`, and this type implements `Send` and `Sync`.
+  - `ThreadPool::new` is deprecated in favor of `ThreadPoolBuilder::build`.
+  - `initialize` is deprecated in favor of `ThreadPoolBuilder::build_global`.
+- Examples have been added to most of the parallel iterator methods.
+- A lot of the documentation has been reorganized and extended.
+
+## Breaking changes
+
+- Rayon now requires rustc 1.13 or greater.
+- `IndexedParallelIterator::len` and `ParallelIterator::opt_len` now operate on
+  `&self` instead of `&mut self`.
+- `IndexedParallelIterator::collect_into` is now `collect_into_vec`.
+- `IndexedParallelIterator::unzip_into` is now `unzip_into_vecs`.
+- Rayon no longer exports the deprecated `Configuration` and `initialize` from
+  rayon-core.
+
+## Contributors
+
+Thanks to all of the contributors for this release!
+
+- @Bilkow
+- @cuviper
+- @Enet4
+- @ignatenkobrain
+- @iwillspeak
+- @jeehoonkang
+- @jwass
+- @Kerollmops
+- @KodrAus
+- @kornelski
+- @MaloJaffre
+- @nikomatsakis
+- @obv-mikhail
+- @oddg
+- @phimuemue
+- @stjepang
+- @tmccombs
+- bors[bot]
+
+
+# Release rayon 0.9.0 / rayon-core 1.3.0 / rayon-futures 0.1.0 (2017-11-09)
+
+- `Configuration` now has a `build` method.
+- `ParallelIterator` added `flatten` and `intersperse`, both inspired by
+  itertools.
+- `IndexedParallelIterator` added `interleave`, `interleave_shortest`, and
+  `zip_eq`, all inspired by itertools.
+- The new functions `iter::empty` and `once` create parallel iterators of
+  exactly zero or one item, like their `std` counterparts.
+- The new functions `iter::repeat` and `repeatn` create parallel iterators
+  repeating an item indefinitely or `n` times, respectively.
+- The new function `join_context` works like `join`, with an added `FnContext`
+  parameter that indicates whether the job was stolen.
+- `Either` (used by `ParallelIterator::partition_map`) is now re-exported from
+  the `either` crate, instead of defining our own type.
+  - `Either` also now implements `ParallelIterator`, `IndexedParallelIterator`,
+    and `ParallelExtend` when both of its `Left` and `Right` types do.
+- All public types now implement `Debug`.
+- Many of the parallel iterators now implement `Clone` where possible.
+- Much of the documentation has been extended. (but still could use more help!)
+- All rayon crates have improved metadata.
+- Rayon was evaluated in the Libz Blitz, leading to many of these improvements.
+- Rayon pull requests are now guarded by bors-ng.
+
+## Futures
+
+The `spawn_future()` method has been refactored into its own `rayon-futures`
+crate, now through a `ScopeFutureExt` trait for `ThreadPool` and `Scope`.  The
+supporting `rayon-core` APIs are still gated by `--cfg rayon_unstable`.
+
+## Breaking changes
+
+- Two breaking changes have been made to `rayon-core`, but since they're fixing
+  soundness bugs, we are considering these _minor_ changes for semver.
+  - `Scope::spawn` now requires `Send` for the closure.
+  - `ThreadPool::install` now requires `Send` for the return value.
+- The `iter::internal` module has been renamed to `iter::plumbing`, to hopefully
+  indicate that while these are low-level details, they're not really internal
+  or private to rayon.  The contents of that module are needed for third-parties
+  to implement new parallel iterators, and we'll treat them with normal semver
+  stability guarantees.
+- The function `rayon::iter::split` is no longer re-exported as `rayon::split`.
+
+## Contributors
+
+Thanks to all of the contributors for this release!
+
+- @AndyGauge
+- @ChristopherDavenport
+- @chrisvittal
+- @cuviper
+- @dns2utf8
+- @dtolnay
+- @frewsxcv
+- @gsquire
+- @Hittherhod
+- @jdr023
+- @laumann
+- @leodasvacas
+- @lvillani
+- @MajorBreakfast
+- @mamuleanu
+- @marmistrz
+- @mbrubeck
+- @mgattozzi
+- @nikomatsakis
+- @smt923
+- @stjepang
+- @tmccombs
+- @vishalsodani
+- bors[bot]
+
+
+# Release rayon 0.8.2 (2017-06-28)
+
+- `ParallelSliceMut` now has six parallel sorting methods with the same
+  variations as the standard library.
+  - `par_sort`, `par_sort_by`, and `par_sort_by_key` perform stable sorts in
+    parallel, using the default order, a custom comparator, or a key extraction
+    function, respectively.
+  - `par_sort_unstable`, `par_sort_unstable_by`, and `par_sort_unstable_by_key`
+    perform unstable sorts with the same comparison options.
+  - Thanks to @stjepang!
+
+
+# Release rayon 0.8.1 / rayon-core 1.2.0 (2017-06-14)
+
+- The following core APIs are being stabilized:
+  - `rayon::spawn()` -- spawns a task into the Rayon threadpool; as it
+    is contained in the global scope (rather than a user-created
+    scope), the task cannot capture anything from the current stack
+    frame.
+  - `ThreadPool::join()`, `ThreadPool::spawn()`, `ThreadPool::scope()`
+    -- convenience APIs for launching new work within a thread-pool.
+- The various iterator adapters are now tagged with `#[must_use]`
+- Parallel iterators now offer a `for_each_with` adapter, similar to
+  `map_with`.
+- We are adopting a new approach to handling the remaining unstable
+  APIs (which primarily pertain to futures integration). As awlays,
+  unstable APIs are intended for experimentation, but do not come with
+  any promise of compatibility (in other words, we might change them
+  in arbitrary ways in any release). Previously, we designated such
+  APIs using a Cargo feature "unstable". Now, we are using a regular
+  `#[cfg]` flag. This means that to see the unstable APIs, you must do
+  `RUSTFLAGS='--cfg rayon_unstable' cargo build`. This is
+  intentionally inconvenient; in particular, if you are a library,
+  then your clients must also modify their environment, signaling
+  their agreement to instability.
+
+
+# Release rayon 0.8.0 / rayon-core 1.1.0 (2017-06-13)
+
+## Rayon 0.8.0
+
+- Added the `map_with` and `fold_with` combinators, which help for
+  passing along state (like channels) that cannot be shared between
+  threads but which can be cloned on each thread split.
+- Added the `while_some` combinator, which helps for writing short-circuiting iterators.
+- Added support for "short-circuiting" collection: e.g., collecting
+  from an iterator producing `Option<T>` or `Result<T, E>` into a
+  `Option<Collection<T>>` or `Result<Collection<T>, E>`.
+- Support `FromParallelIterator` for `Cow`.
+- Removed the deprecated weight APIs.
+- Simplified the parallel iterator trait hierarchy by removing the
+  `BoundedParallelIterator` and `ExactParallelIterator` traits,
+  which were not serving much purpose.
+- Improved documentation.
+- Added some missing `Send` impls.
+- Fixed some small bugs.
+
+## Rayon-core 1.1.0
+
+- We now have more documentation.
+- Renamed the (unstable) methods `spawn_async` and
+  `spawn_future_async` -- which spawn tasks that cannot hold
+  references -- to simply `spawn` and `spawn_future`, respectively.
+- We are now using the coco library for our deque.
+- Individual threadpools can now be configured in "breadth-first"
+  mode, which causes them to execute spawned tasks in the reverse
+  order that they used to.  In some specific scenarios, this can be a
+  win (though it is not generally the right choice).
+- Added top-level functions:
+  - `current_thread_index`, for querying the index of the current worker thread within
+    its thread-pool (previously available as `thread_pool.current_thread_index()`);
+  - `current_thread_has_pending_tasks`, for querying whether the
+    current worker that has an empty task deque or not. This can be
+    useful when deciding whether to spawn a task.
+- The environment variables for controlling Rayon are now
+  `RAYON_NUM_THREADS` and `RAYON_LOG`. The older variables (e.g.,
+  `RAYON_RS_NUM_CPUS` are still supported but deprecated).
+
+## Rayon-demo
+
+- Added a new game-of-life benchmark.
+
+## Contributors
+
+Thanks to the following contributors:
+
+- @ChristopherDavenport
+- @SuperFluffy
+- @antoinewdg
+- @crazymykl
+- @cuviper
+- @glandium
+- @julian-seward1
+- @leodasvacas
+- @leshow
+- @lilianmoraru
+- @mschmo
+- @nikomatsakis
+- @stjepang
+
+
+# Release rayon 0.7.1 / rayon-core 1.0.2 (2017-05-30)
+
+This release is a targeted performance fix for #343, an issue where
+rayon threads could sometimes enter into a spin loop where they would
+be unable to make progress until they are pre-empted.
+
+
+# Release rayon 0.7 / rayon-core 1.0 (2017-04-06)
+
+This release marks the first step towards Rayon 1.0. **For best
+performance, it is important that all Rayon users update to at least
+Rayon 0.7.** This is because, as of Rayon 0.7, we have taken steps to
+ensure that, no matter how many versions of rayon are actively in use,
+there will only be a single global scheduler. This is achieved via the
+`rayon-core` crate, which is being released at version 1.0, and which
+encapsulates the core schedule APIs like `join()`. (Note: the
+`rayon-core` crate is, to some degree, an implementation detail, and
+not intended to be imported directly; it's entire API surface is
+mirrored through the rayon crate.)
+
+We have also done a lot of work reorganizing the API for Rayon 0.7 in
+preparation for 1.0. The names of iterator types have been changed and
+reorganized (but few users are expected to be naming those types
+explicitly anyhow). In addition, a number of parallel iterator methods
+have been adjusted to match those in the standard iterator traits more
+closely. See the "Breaking Changes" section below for
+details.
+
+Finally, Rayon 0.7 includes a number of new features and new parallel
+iterator methods. **As of this release, Rayon's parallel iterators
+have officially reached parity with sequential iterators** -- that is,
+every sequential iterator method that makes any sense in parallel is
+supported in some capacity.
+
+### New features and methods
+
+- The internal `Producer` trait now features `fold_with`, which enables
+  better performance for some parallel iterators.
+- Strings now support `par_split()` and `par_split_whitespace()`.
+- The `Configuration` API is expanded and simplified:
+    - `num_threads(0)` no longer triggers an error
+    - you can now supply a closure to name the Rayon threads that get created
+      by using `Configuration::thread_name`.
+    - you can now inject code when Rayon threads start up and finish
+    - you can now set a custom panic handler to handle panics in various odd situations
+- Threadpools are now able to more gracefully put threads to sleep when not needed.
+- Parallel iterators now support `find_first()`, `find_last()`, `position_first()`,
+  and `position_last()`.
+- Parallel iterators now support `rev()`, which primarily affects subsequent calls
+  to `enumerate()`.
+- The `scope()` API is now considered stable (and part of `rayon-core`).
+- There is now a useful `rayon::split` function for creating custom
+  Rayon parallel iterators.
+- Parallel iterators now allow you to customize the min/max number of
+  items to be processed in a given thread. This mechanism replaces the
+  older `weight` mechanism, which is deprecated.
+- `sum()` and friends now use the standard `Sum` traits
+
+### Breaking changes
+
+In the move towards 1.0, there have been a number of minor breaking changes:
+
+- Configuration setters like `Configuration::set_num_threads()` lost the `set_` prefix,
+  and hence become something like `Configuration::num_threads()`.
+- `Configuration` getters are removed
+- Iterator types have been shuffled around and exposed more consistently:
+    - combinator types live in `rayon::iter`, e.g. `rayon::iter::Filter`
+    - iterators over various types live in a module named after their type,
+      e.g. `rayon::slice::Windows`
+- When doing a `sum()` or `product()`, type annotations are needed for the result
+  since it is now possible to have the resulting sum be of a type other than the value
+  you are iterating over (this mirrors sequential iterators).
+
+### Experimental features
+
+Experimental features require the use of the `unstable` feature. Their
+APIs may change or disappear entirely in future releases (even minor
+releases) and hence they should be avoided for production code.
+
+- We now have (unstable) support for futures integration. You can use
+  `Scope::spawn_future` or `rayon::spawn_future_async()`.
+- There is now a `rayon::spawn_async()` function for using the Rayon
+  threadpool to run tasks that do not have references to the stack.
+
+### Contributors
+
+Thanks to the following people for their contributions to this release:
+
+- @Aaronepower
+- @ChristopherDavenport
+- @bluss
+- @cuviper
+- @froydnj
+- @gaurikholkar
+- @hniksic
+- @leodasvacas
+- @leshow
+- @martinhath
+- @mbrubeck
+- @nikomatsakis
+- @pegomes
+- @schuster
+- @torkleyy
+
+
+# Release 0.6 (2016-12-21)
+
+This release includes a lot of progress towards the goal of parity
+with the sequential iterator API, though there are still a few methods
+that are not yet complete. If you'd like to help with that effort,
+[check out the milestone](https://github.com/nikomatsakis/rayon/issues?q=is%3Aopen+is%3Aissue+milestone%3A%22Parity+with+the+%60Iterator%60+trait%22)
+to see the remaining issues.
+
+**Announcement:** @cuviper has been added as a collaborator to the
+Rayon repository for all of his outstanding work on Rayon, which
+includes both internal refactoring and helping to shape the public
+API. Thanks @cuviper! Keep it up.
+
+- We now support `collect()` and not just `collect_with()`.
+  You can use `collect()` to build a number of collections,
+  including vectors, maps, and sets. Moreover, when building a vector
+  with `collect()`, you are no longer limited to exact parallel iterators.
+  Thanks @nikomatsakis, @cuviper!
+- We now support `skip()` and `take()` on parallel iterators.
+  Thanks @martinhath!
+- **Breaking change:** We now match the sequential APIs for `min()` and `max()`.
+  We also support `min_by_key()` and `max_by_key()`. Thanks @tapeinosyne!
+- **Breaking change:** The `mul()` method is now renamed to `product()`,
+  to match sequential iterators. Thanks @jonathandturner!
+- We now support parallel iterator over ranges on `u64` values. Thanks @cuviper!
+- We now offer a `par_chars()` method on strings for iterating over characters
+  in parallel. Thanks @cuviper!
+- We now have new demos: a traveling salesman problem solver as well as matrix
+  multiplication. Thanks @nikomatsakis, @edre!
+- We are now documenting our minimum rustc requirement (currently
+  v1.12.0).  We will attempt to maintain compatibility with rustc
+  stable v1.12.0 as long as it remains convenient, but if new features
+  are stabilized or added that would be helpful to Rayon, or there are
+  bug fixes that we need, we will bump to the most recent rustc. Thanks @cuviper!
+- The `reduce()` functionality now has better inlining.
+  Thanks @bluss!
+- The `join()` function now has some documentation. Thanks @gsquire!
+- The project source has now been fully run through rustfmt.
+  Thanks @ChristopherDavenport!
+- Exposed helper methods for accessing the current thread index.
+  Thanks @bholley!
+
+
+# Release 0.5 (2016-11-04)
+
+- **Breaking change:** The `reduce` method has been vastly
+  simplified, and `reduce_with_identity` has been deprecated.
+- **Breaking change:** The `fold` method has been changed. It used to
+  always reduce the values, but now instead it is a combinator that
+  returns a parallel iterator which can itself be reduced. See the
+  docs for more information.
+- The following parallel iterator combinators are now available (thanks @cuviper!):
+  - `find_any()`: similar to `find` on a sequential iterator,
+    but doesn't necessarily return the *first* matching item
+  - `position_any()`: similar to `position` on a sequential iterator,
+    but doesn't necessarily return the index of *first* matching item
+  - `any()`, `all()`: just like their sequential counterparts
+- The `count()` combinator is now available for parallel iterators.
+- We now build with older versions of rustc again (thanks @durango!),
+  as we removed a stray semicolon from `thread_local!`.
+- Various improvements to the (unstable) `scope()` API implementation.
+
+
+# Release 0.4.3 (2016-10-25)
+
+- Parallel iterators now offer an adaptive weight scheme,
+  which means that explicit weights should no longer
+  be necessary in most cases! Thanks @cuviper!
+  - We are considering removing weights or changing the weight mechanism
+    before 1.0. Examples of scenarios where you still need weights even
+    with this adaptive mechanism would be great. Join the discussion
+    at <https://github.com/nikomatsakis/rayon/issues/111>.
+- New (unstable) scoped threads API, see `rayon::scope` for details.
+  - You will need to supply the [cargo feature] `unstable`.
+- The various demos and benchmarks have been consolidated into one
+  program, `rayon-demo`.
+- Optimizations in Rayon's inner workings. Thanks @emilio!
+- Update `num_cpus` to 1.0. Thanks @jamwt!
+- Various internal cleanup in the implementation and typo fixes.
+  Thanks @cuviper, @Eh2406, and @spacejam!
+
+[cargo feature]: http://doc.crates.io/manifest.html#the-features-section
+
+
+# Release 0.4.2 (2016-09-15)
+
+- Updated crates.io metadata.
+
+
+# Release 0.4.1 (2016-09-14)
+
+- New `chain` combinator for parallel iterators.
+- `Option`, `Result`, as well as many more collection types now have
+  parallel iterators.
+- New mergesort demo.
+- Misc fixes.
+
+Thanks to @cuviper, @edre, @jdanford, @frewsxcv for their contributions!
+
+
+# Release 0.4 (2016-05-16)
+
+- Make use of latest versions of catch-panic and various fixes to panic propagation.
+- Add new prime sieve demo.
+- Add `cloned()` and `inspect()` combinators.
+- Misc fixes for Rust RFC 1214.
+
+Thanks to @areilb1, @Amanieu, @SharplEr, and @cuviper for their contributions!
+
+
+# Release 0.3 (2016-02-23)
+
+- Expanded `par_iter` APIs now available:
+  - `into_par_iter` is now supported on vectors (taking ownership of the elements)
+- Panic handling is much improved:
+  - if you use the Nightly feature, experimental panic recovery is available
+  - otherwise, panics propagate out and poision the workpool
+- New `Configuration` object to control number of threads and other details
+- New demos and benchmarks
+  - try `cargo run --release -- visualize` in `demo/nbody` :)
+    - Note: a nightly compiler is required for this demo due to the
+      use of the `+=` syntax
+
+Thanks to @bjz, @cuviper, @Amanieu, and @willi-kappler for their contributions!
+
+
+# Release 0.2 and earlier
+
+No release notes were being kept at this time.
diff --git a/build.rs b/build.rs
new file mode 100644
index 0000000..70e301b
--- /dev/null
+++ b/build.rs
@@ -0,0 +1,6 @@
+fn main() {
+    let ac = autocfg::new();
+    if ac.probe_expression("(0..10).step_by(2).rev()") {
+        autocfg::emit("step_by");
+    }
+}
diff --git a/examples/README.md b/examples/README.md
new file mode 100644
index 0000000..ec08a4d
--- /dev/null
+++ b/examples/README.md
@@ -0,0 +1,3 @@
+We use this directory for interactive tests that can't be run in an
+automatic fashion. For examples of how to use Rayon, or benchmarks,
+see `rayon-demo`.
diff --git a/examples/cpu_monitor.rs b/examples/cpu_monitor.rs
new file mode 100644
index 0000000..bbe1316
--- /dev/null
+++ b/examples/cpu_monitor.rs
@@ -0,0 +1,81 @@
+use docopt::Docopt;
+use std::io;
+use std::process;
+
+const USAGE: &str = "
+Usage: cpu_monitor [options] <scenario>
+       cpu_monitor --help
+
+A test for monitoring how much CPU usage Rayon consumes under various
+scenarios. This test is intended to be executed interactively, like so:
+
+    cargo run --example cpu_monitor -- tasks_ended
+
+The list of scenarios you can try are as follows:
+
+- tasks_ended: after all tasks have finished, go to sleep
+- task_stall_root: a root task stalls for a very long time
+- task_stall_scope: a task in a scope stalls for a very long time
+
+Options:
+    -h, --help                   Show this message.
+    -d N, --depth N              Control how hard the dummy task works [default: 27]
+";
+
+#[derive(serde::Deserialize)]
+pub struct Args {
+    arg_scenario: String,
+    flag_depth: usize,
+}
+
+fn main() {
+    let args: &Args = &Docopt::new(USAGE)
+        .and_then(|d| d.deserialize())
+        .unwrap_or_else(|e| e.exit());
+
+    match &args.arg_scenario[..] {
+        "tasks_ended" => tasks_ended(args),
+        "task_stall_root" => task_stall_root(args),
+        "task_stall_scope" => task_stall_scope(args),
+        _ => {
+            println!("unknown scenario: `{}`", args.arg_scenario);
+            println!("try --help");
+            process::exit(1);
+        }
+    }
+}
+
+fn wait_for_user() {
+    let mut input = String::new();
+    io::stdin().read_line(&mut input).unwrap();
+}
+
+fn task(args: &Args) {
+    fn join_recursively(n: usize) {
+        if n == 0 {
+            return;
+        }
+        rayon::join(|| join_recursively(n - 1), || join_recursively(n - 1));
+    }
+
+    println!("Starting heavy work at depth {}...wait.", args.flag_depth);
+    join_recursively(args.flag_depth);
+    println!("Heavy work done; check top. You should see CPU usage drop to zero soon.");
+    println!("Press <enter> to quit...");
+}
+
+fn tasks_ended(args: &Args) {
+    task(args);
+    wait_for_user();
+}
+
+fn task_stall_root(args: &Args) {
+    rayon::join(|| task(args), wait_for_user);
+}
+
+fn task_stall_scope(args: &Args) {
+    rayon::scope(|scope| {
+        scope.spawn(move |_| task(args));
+        scope.spawn(move |_| wait_for_user());
+    });
+}
diff --git a/src/collections/binary_heap.rs b/src/collections/binary_heap.rs
new file mode 100644
index 0000000..fa90312
--- /dev/null
+++ b/src/collections/binary_heap.rs
@@ -0,0 +1,120 @@
+//! This module contains the parallel iterator types for heaps
+//! (`BinaryHeap<T>`). You will rarely need to interact with it directly
+//! unless you have need to name one of the iterator types.
+
+use std::collections::BinaryHeap;
+
+use crate::iter::plumbing::*;
+use crate::iter::*;
+
+use crate::vec;
+
+/// Parallel iterator over a binary heap
+#[derive(Debug, Clone)]
+pub struct IntoIter<T: Ord + Send> {
+    inner: vec::IntoIter<T>,
+}
+
+impl<T: Ord + Send> IntoParallelIterator for BinaryHeap<T> {
+    type Item = T;
+    type Iter = IntoIter<T>;
+
+    fn into_par_iter(self) -> Self::Iter {
+        IntoIter {
+            inner: Vec::from(self).into_par_iter(),
+        }
+    }
+}
+
+delegate_indexed_iterator! {
+    IntoIter<T> => T,
+    impl<T: Ord + Send>
+}
+
+/// Parallel iterator over an immutable reference to a binary heap
+#[derive(Debug)]
+pub struct Iter<'a, T: Ord + Sync> {
+    inner: vec::IntoIter<&'a T>,
+}
+
+impl<'a, T: Ord + Sync> Clone for Iter<'a, T> {
+    fn clone(&self) -> Self {
+        Iter {
+            inner: self.inner.clone(),
+        }
+    }
+}
+
+into_par_vec! {
+    &'a BinaryHeap<T> => Iter<'a, T>,
+    impl<'a, T: Ord + Sync>
+}
+
+delegate_indexed_iterator! {
+    Iter<'a, T> => &'a T,
+    impl<'a, T: Ord + Sync + 'a>
+}
+
+// `BinaryHeap` doesn't have a mutable `Iterator`
+
+/// Draining parallel iterator that moves out of a binary heap,
+/// but keeps the total capacity.
+#[derive(Debug)]
+pub struct Drain<'a, T: Ord + Send> {
+    heap: &'a mut BinaryHeap<T>,
+}
+
+impl<'a, T: Ord + Send> ParallelDrainFull for &'a mut BinaryHeap<T> {
+    type Iter = Drain<'a, T>;
+    type Item = T;
+
+    fn par_drain(self) -> Self::Iter {
+        Drain { heap: self }
+    }
+}
+
+impl<'a, T: Ord + Send> ParallelIterator for Drain<'a, T> {
+    type Item = T;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<'a, T: Ord + Send> IndexedParallelIterator for Drain<'a, T> {
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn len(&self) -> usize {
+        self.heap.len()
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        super::DrainGuard::new(self.heap)
+            .par_drain(..)
+            .with_producer(callback)
+    }
+}
+
+impl<'a, T: Ord + Send> Drop for Drain<'a, T> {
+    fn drop(&mut self) {
+        if !self.heap.is_empty() {
+            // We must not have produced, so just call a normal drain to remove the items.
+            self.heap.drain();
+        }
+    }
+}
diff --git a/src/collections/btree_map.rs b/src/collections/btree_map.rs
new file mode 100644
index 0000000..12436dc
--- /dev/null
+++ b/src/collections/btree_map.rs
@@ -0,0 +1,66 @@
+//! This module contains the parallel iterator types for B-Tree maps
+//! (`BTreeMap<K, V>`). You will rarely need to interact with it directly
+//! unless you have need to name one of the iterator types.
+
+use std::collections::BTreeMap;
+
+use crate::iter::plumbing::*;
+use crate::iter::*;
+
+use crate::vec;
+
+/// Parallel iterator over a B-Tree map
+#[derive(Debug)] // std doesn't Clone
+pub struct IntoIter<K: Ord + Send, V: Send> {
+    inner: vec::IntoIter<(K, V)>,
+}
+
+into_par_vec! {
+    BTreeMap<K, V> => IntoIter<K, V>,
+    impl<K: Ord + Send, V: Send>
+}
+
+delegate_iterator! {
+    IntoIter<K, V> => (K, V),
+    impl<K: Ord + Send, V: Send>
+}
+
+/// Parallel iterator over an immutable reference to a B-Tree map
+#[derive(Debug)]
+pub struct Iter<'a, K: Ord + Sync, V: Sync> {
+    inner: vec::IntoIter<(&'a K, &'a V)>,
+}
+
+impl<'a, K: Ord + Sync, V: Sync> Clone for Iter<'a, K, V> {
+    fn clone(&self) -> Self {
+        Iter {
+            inner: self.inner.clone(),
+        }
+    }
+}
+
+into_par_vec! {
+    &'a BTreeMap<K, V> => Iter<'a, K, V>,
+    impl<'a, K: Ord + Sync, V: Sync>
+}
+
+delegate_iterator! {
+    Iter<'a, K, V> => (&'a K, &'a V),
+    impl<'a, K: Ord + Sync + 'a, V: Sync + 'a>
+}
+
+/// Parallel iterator over a mutable reference to a B-Tree map
+#[derive(Debug)]
+pub struct IterMut<'a, K: Ord + Sync, V: Send> {
+    inner: vec::IntoIter<(&'a K, &'a mut V)>,
+}
+
+into_par_vec! {
+    &'a mut BTreeMap<K, V> => IterMut<'a, K, V>,
+    impl<'a, K: Ord + Sync, V: Send>
+}
+
+delegate_iterator! {
+    IterMut<'a, K, V> => (&'a K, &'a mut V),
+    impl<'a, K: Ord + Sync + 'a, V: Send + 'a>
+}
diff --git a/src/collections/btree_set.rs b/src/collections/btree_set.rs
new file mode 100644
index 0000000..061d37c
--- /dev/null
+++ b/src/collections/btree_set.rs
@@ -0,0 +1,52 @@
+//! This module contains the parallel iterator types for B-Tree sets
+//! (`BTreeSet<T>`). You will rarely need to interact with it directly
+//! unless you have need to name one of the iterator types.
+
+use std::collections::BTreeSet;
+
+use crate::iter::plumbing::*;
+use crate::iter::*;
+
+use crate::vec;
+
+/// Parallel iterator over a B-Tree set
+#[derive(Debug)] // std doesn't Clone
+pub struct IntoIter<T: Ord + Send> {
+    inner: vec::IntoIter<T>,
+}
+
+into_par_vec! {
+    BTreeSet<T> => IntoIter<T>,
+    impl<T: Ord + Send>
+}
+
+delegate_iterator! {
+    IntoIter<T> => T,
+    impl<T: Ord + Send>
+}
+
+/// Parallel iterator over an immutable reference to a B-Tree set
+#[derive(Debug)]
+pub struct Iter<'a, T: Ord + Sync> {
+    inner: vec::IntoIter<&'a T>,
+}
+
+impl<'a, T: Ord + Sync + 'a> Clone for Iter<'a, T> {
+    fn clone(&self) -> Self {
+        Iter {
+            inner: self.inner.clone(),
+        }
+    }
+}
+
+into_par_vec! {
+    &'a BTreeSet<T> => Iter<'a, T>,
+    impl<'a, T: Ord + Sync>
+}
+
+delegate_iterator! {
+    Iter<'a, T> => &'a T,
+    impl<'a, T: Ord + Sync + 'a>
+}
+
+// `BTreeSet` doesn't have a mutable `Iterator`
diff --git a/src/collections/hash_map.rs b/src/collections/hash_map.rs
new file mode 100644
index 0000000..b657851
--- /dev/null
+++ b/src/collections/hash_map.rs
@@ -0,0 +1,96 @@
+//! This module contains the parallel iterator types for hash maps
+//! (`HashMap<K, V>`). You will rarely need to interact with it directly
+//! unless you have need to name one of the iterator types.
+
+use std::collections::HashMap;
+use std::hash::{BuildHasher, Hash};
+use std::marker::PhantomData;
+
+use crate::iter::plumbing::*;
+use crate::iter::*;
+
+use crate::vec;
+
+/// Parallel iterator over a hash map
+#[derive(Debug)] // std doesn't Clone
+pub struct IntoIter<K: Hash + Eq + Send, V: Send> {
+    inner: vec::IntoIter<(K, V)>,
+}
+
+into_par_vec! {
+    HashMap<K, V, S> => IntoIter<K, V>,
+    impl<K: Hash + Eq + Send, V: Send, S: BuildHasher>
+}
+
+delegate_iterator! {
+    IntoIter<K, V> => (K, V),
+    impl<K: Hash + Eq + Send, V: Send>
+}
+
+/// Parallel iterator over an immutable reference to a hash map
+#[derive(Debug)]
+pub struct Iter<'a, K: Hash + Eq + Sync, V: Sync> {
+    inner: vec::IntoIter<(&'a K, &'a V)>,
+}
+
+impl<'a, K: Hash + Eq + Sync, V: Sync> Clone for Iter<'a, K, V> {
+    fn clone(&self) -> Self {
+        Iter {
+            inner: self.inner.clone(),
+        }
+    }
+}
+
+into_par_vec! {
+    &'a HashMap<K, V, S> => Iter<'a, K, V>,
+    impl<'a, K: Hash + Eq + Sync, V: Sync, S: BuildHasher>
+}
+
+delegate_iterator! {
+    Iter<'a, K, V> => (&'a K, &'a V),
+    impl<'a, K: Hash + Eq + Sync + 'a, V: Sync + 'a>
+}
+
+/// Parallel iterator over a mutable reference to a hash map
+#[derive(Debug)]
+pub struct IterMut<'a, K: Hash + Eq + Sync, V: Send> {
+    inner: vec::IntoIter<(&'a K, &'a mut V)>,
+}
+
+into_par_vec! {
+    &'a mut HashMap<K, V, S> => IterMut<'a, K, V>,
+    impl<'a, K: Hash + Eq + Sync, V: Send, S: BuildHasher>
+}
+
+delegate_iterator! {
+    IterMut<'a, K, V> => (&'a K, &'a mut V),
+    impl<'a, K: Hash + Eq + Sync + 'a, V: Send + 'a>
+}
+
+/// Draining parallel iterator that moves out of a hash map,
+/// but keeps the total capacity.
+#[derive(Debug)]
+pub struct Drain<'a, K: Hash + Eq + Send, V: Send> {
+    inner: vec::IntoIter<(K, V)>,
+    marker: PhantomData<&'a mut HashMap<K, V>>,
+}
+
+impl<'a, K: Hash + Eq + Send, V: Send, S: BuildHasher> ParallelDrainFull
+    for &'a mut HashMap<K, V, S>
+{
+    type Iter = Drain<'a, K, V>;
+    type Item = (K, V);
+
+    fn par_drain(self) -> Self::Iter {
+        let vec: Vec<_> = self.drain().collect();
+        Drain {
+            inner: vec.into_par_iter(),
+            marker: PhantomData,
+        }
+    }
+}
+
+delegate_iterator! {
+    Drain<'_, K, V> => (K, V),
+    impl<K: Hash + Eq + Send, V: Send>
+}
diff --git a/src/collections/hash_set.rs b/src/collections/hash_set.rs
new file mode 100644
index 0000000..b6ee1c1
--- /dev/null
+++ b/src/collections/hash_set.rs
@@ -0,0 +1,80 @@
+//! This module contains the parallel iterator types for hash sets
+//! (`HashSet<T>`). You will rarely need to interact with it directly
+//! unless you have need to name one of the iterator types.
+
+use std::collections::HashSet;
+use std::hash::{BuildHasher, Hash};
+use std::marker::PhantomData;
+
+use crate::iter::plumbing::*;
+use crate::iter::*;
+
+use crate::vec;
+
+/// Parallel iterator over a hash set
+#[derive(Debug)] // std doesn't Clone
+pub struct IntoIter<T: Hash + Eq + Send> {
+    inner: vec::IntoIter<T>,
+}
+
+into_par_vec! {
+    HashSet<T, S> => IntoIter<T>,
+    impl<T: Hash + Eq + Send, S: BuildHasher>
+}
+
+delegate_iterator! {
+    IntoIter<T> => T,
+    impl<T: Hash + Eq + Send>
+}
+
+/// Parallel iterator over an immutable reference to a hash set
+#[derive(Debug)]
+pub struct Iter<'a, T: Hash + Eq + Sync> {
+    inner: vec::IntoIter<&'a T>,
+}
+
+impl<'a, T: Hash + Eq + Sync> Clone for Iter<'a, T> {
+    fn clone(&self) -> Self {
+        Iter {
+            inner: self.inner.clone(),
+        }
+    }
+}
+
+into_par_vec! {
+    &'a HashSet<T, S> => Iter<'a, T>,
+    impl<'a, T: Hash + Eq + Sync, S: BuildHasher>
+}
+
+delegate_iterator! {
+    Iter<'a, T> => &'a T,
+    impl<'a, T: Hash + Eq + Sync + 'a>
+}
+
+// `HashSet` doesn't have a mutable `Iterator`
+
+/// Draining parallel iterator that moves out of a hash set,
+/// but keeps the total capacity.
+#[derive(Debug)]
+pub struct Drain<'a, T: Hash + Eq + Send> {
+    inner: vec::IntoIter<T>,
+    marker: PhantomData<&'a mut HashSet<T>>,
+}
+
+impl<'a, T: Hash + Eq + Send, S: BuildHasher> ParallelDrainFull for &'a mut HashSet<T, S> {
+    type Iter = Drain<'a, T>;
+    type Item = T;
+
+    fn par_drain(self) -> Self::Iter {
+        let vec: Vec<_> = self.drain().collect();
+        Drain {
+            inner: vec.into_par_iter(),
+            marker: PhantomData,
+        }
+    }
+}
+
+delegate_iterator! {
+    Drain<'_, T> => T,
+    impl<T: Hash + Eq + Send>
+}
diff --git a/src/collections/linked_list.rs b/src/collections/linked_list.rs
new file mode 100644
index 0000000..bddd2b0
--- /dev/null
+++ b/src/collections/linked_list.rs
@@ -0,0 +1,66 @@
+//! This module contains the parallel iterator types for linked lists
+//! (`LinkedList<T>`). You will rarely need to interact with it directly
+//! unless you have need to name one of the iterator types.
+
+use std::collections::LinkedList;
+
+use crate::iter::plumbing::*;
+use crate::iter::*;
+
+use crate::vec;
+
+/// Parallel iterator over a linked list
+#[derive(Debug, Clone)]
+pub struct IntoIter<T: Send> {
+    inner: vec::IntoIter<T>,
+}
+
+into_par_vec! {
+    LinkedList<T> => IntoIter<T>,
+    impl<T: Send>
+}
+
+delegate_iterator! {
+    IntoIter<T> => T,
+    impl<T: Send>
+}
+
+/// Parallel iterator over an immutable reference to a linked list
+#[derive(Debug)]
+pub struct Iter<'a, T: Sync> {
+    inner: vec::IntoIter<&'a T>,
+}
+
+impl<'a, T: Sync> Clone for Iter<'a, T> {
+    fn clone(&self) -> Self {
+        Iter {
+            inner: self.inner.clone(),
+        }
+    }
+}
+
+into_par_vec! {
+    &'a LinkedList<T> => Iter<'a, T>,
+    impl<'a, T: Sync>
+}
+
+delegate_iterator! {
+    Iter<'a, T> => &'a T,
+    impl<'a, T: Sync + 'a>
+}
+
+/// Parallel iterator over a mutable reference to a linked list
+#[derive(Debug)]
+pub struct IterMut<'a, T: Send> {
+    inner: vec::IntoIter<&'a mut T>,
+}
+
+into_par_vec! {
+    &'a mut LinkedList<T> => IterMut<'a, T>,
+    impl<'a, T: Send>
+}
+
+delegate_iterator! {
+    IterMut<'a, T> => &'a mut T,
+    impl<'a, T: Send + 'a>
+}
diff --git a/src/collections/mod.rs b/src/collections/mod.rs
new file mode 100644
index 0000000..d9b7988
--- /dev/null
+++ b/src/collections/mod.rs
@@ -0,0 +1,84 @@
+//! Parallel iterator types for [standard collections][std::collections]
+//!
+//! You will rarely need to interact with this module directly unless you need
+//! to name one of the iterator types.
+//!
+//! [std::collections]: https://doc.rust-lang.org/stable/std/collections/
+
+/// Convert an iterable collection into a parallel iterator by first
+/// collecting into a temporary `Vec`, then iterating that.
+macro_rules! into_par_vec {
+    ($t:ty => $iter:ident<$($i:tt),*>, impl $($args:tt)*) => {
+        impl $($args)* IntoParallelIterator for $t {
+            type Item = <$t as IntoIterator>::Item;
+            type Iter = $iter<$($i),*>;
+
+            fn into_par_iter(self) -> Self::Iter {
+                use std::iter::FromIterator;
+                $iter { inner: Vec::from_iter(self).into_par_iter() }
+            }
+        }
+    };
+}
+
+pub mod binary_heap;
+pub mod btree_map;
+pub mod btree_set;
+pub mod hash_map;
+pub mod hash_set;
+pub mod linked_list;
+pub mod vec_deque;
+
+use self::drain_guard::DrainGuard;
+
+mod drain_guard {
+    use crate::iter::ParallelDrainRange;
+    use std::mem;
+    use std::ops::RangeBounds;
+
+    /// A proxy for draining a collection by converting to a `Vec` and back.
+    ///
+    /// This is used for draining `BinaryHeap` and `VecDeque`, which both have
+    /// zero-allocation conversions to/from `Vec`, though not zero-cost:
+    /// - `BinaryHeap` will heapify from `Vec`, but at least that will be empty.
+    /// - `VecDeque` has to shift items to offset 0 when converting to `Vec`.
+    #[allow(missing_debug_implementations)]
+    pub(super) struct DrainGuard<'a, T, C: From<Vec<T>>> {
+        collection: &'a mut C,
+        vec: Vec<T>,
+    }
+
+    impl<'a, T, C> DrainGuard<'a, T, C>
+    where
+        C: Default + From<Vec<T>>,
+        Vec<T>: From<C>,
+    {
+        pub(super) fn new(collection: &'a mut C) -> Self {
+            Self {
+                // Temporarily steal the inner `Vec` so we can drain in place.
+                vec: Vec::from(mem::replace(collection, C::default())),
+                collection,
+            }
+        }
+    }
+
+    impl<'a, T, C: From<Vec<T>>> Drop for DrainGuard<'a, T, C> {
+        fn drop(&mut self) {
+            // Restore the collection from the `Vec` with its original capacity.
+            *self.collection = C::from(mem::replace(&mut self.vec, Vec::new()));
+        }
+    }
+
+    impl<'a, T, C> ParallelDrainRange<usize> for &'a mut DrainGuard<'_, T, C>
+    where
+        T: Send,
+        C: From<Vec<T>>,
+    {
+        type Iter = crate::vec::Drain<'a, T>;
+        type Item = T;
+
+        fn par_drain<R: RangeBounds<usize>>(self, range: R) -> Self::Iter {
+            self.vec.par_drain(range)
+        }
+    }
+}
diff --git a/src/collections/vec_deque.rs b/src/collections/vec_deque.rs
new file mode 100644
index 0000000..f87ce6b
--- /dev/null
+++ b/src/collections/vec_deque.rs
@@ -0,0 +1,159 @@
+//! This module contains the parallel iterator types for double-ended queues
+//! (`VecDeque<T>`). You will rarely need to interact with it directly
+//! unless you have need to name one of the iterator types.
+
+use std::collections::VecDeque;
+use std::ops::{Range, RangeBounds};
+
+use crate::iter::plumbing::*;
+use crate::iter::*;
+use crate::math::simplify_range;
+
+use crate::slice;
+use crate::vec;
+
+/// Parallel iterator over a double-ended queue
+#[derive(Debug, Clone)]
+pub struct IntoIter<T: Send> {
+    inner: vec::IntoIter<T>,
+}
+
+impl<T: Send> IntoParallelIterator for VecDeque<T> {
+    type Item = T;
+    type Iter = IntoIter<T>;
+
+    fn into_par_iter(self) -> Self::Iter {
+        // NOTE: requires data movement if the deque doesn't start at offset 0.
+        let inner = Vec::from(self).into_par_iter();
+        IntoIter { inner }
+    }
+}
+
+delegate_indexed_iterator! {
+    IntoIter<T> => T,
+    impl<T: Send>
+}
+
+/// Parallel iterator over an immutable reference to a double-ended queue
+#[derive(Debug)]
+pub struct Iter<'a, T: Sync> {
+    inner: Chain<slice::Iter<'a, T>, slice::Iter<'a, T>>,
+}
+
+impl<'a, T: Sync> Clone for Iter<'a, T> {
+    fn clone(&self) -> Self {
+        Iter {
+            inner: self.inner.clone(),
+        }
+    }
+}
+
+impl<'a, T: Sync> IntoParallelIterator for &'a VecDeque<T> {
+    type Item = &'a T;
+    type Iter = Iter<'a, T>;
+
+    fn into_par_iter(self) -> Self::Iter {
+        let (a, b) = self.as_slices();
+        Iter {
+            inner: a.into_par_iter().chain(b),
+        }
+    }
+}
+
+delegate_indexed_iterator! {
+    Iter<'a, T> => &'a T,
+    impl<'a, T: Sync + 'a>
+}
+
+/// Parallel iterator over a mutable reference to a double-ended queue
+#[derive(Debug)]
+pub struct IterMut<'a, T: Send> {
+    inner: Chain<slice::IterMut<'a, T>, slice::IterMut<'a, T>>,
+}
+
+impl<'a, T: Send> IntoParallelIterator for &'a mut VecDeque<T> {
+    type Item = &'a mut T;
+    type Iter = IterMut<'a, T>;
+
+    fn into_par_iter(self) -> Self::Iter {
+        let (a, b) = self.as_mut_slices();
+        IterMut {
+            inner: a.into_par_iter().chain(b),
+        }
+    }
+}
+
+delegate_indexed_iterator! {
+    IterMut<'a, T> => &'a mut T,
+    impl<'a, T: Send + 'a>
+}
+
+/// Draining parallel iterator that moves a range out of a double-ended queue,
+/// but keeps the total capacity.
+#[derive(Debug)]
+pub struct Drain<'a, T: Send> {
+    deque: &'a mut VecDeque<T>,
+    range: Range<usize>,
+    orig_len: usize,
+}
+
+impl<'a, T: Send> ParallelDrainRange<usize> for &'a mut VecDeque<T> {
+    type Iter = Drain<'a, T>;
+    type Item = T;
+
+    fn par_drain<R: RangeBounds<usize>>(self, range: R) -> Self::Iter {
+        Drain {
+            orig_len: self.len(),
+            range: simplify_range(range, self.len()),
+            deque: self,
+        }
+    }
+}
+
+impl<'a, T: Send> ParallelIterator for Drain<'a, T> {
+    type Item = T;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<'a, T: Send> IndexedParallelIterator for Drain<'a, T> {
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn len(&self) -> usize {
+        self.range.len()
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        // NOTE: requires data movement if the deque doesn't start at offset 0.
+        super::DrainGuard::new(self.deque)
+            .par_drain(self.range.clone())
+            .with_producer(callback)
+    }
+}
+
+impl<'a, T: Send> Drop for Drain<'a, T> {
+    fn drop(&mut self) {
+        if self.deque.len() != self.orig_len - self.range.len() {
+            // We must not have produced, so just call a normal drain to remove the items.
+            assert_eq!(self.deque.len(), self.orig_len);
+            self.deque.drain(self.range.clone());
+        }
+    }
+}
diff --git a/src/compile_fail/cannot_collect_filtermap_data.rs b/src/compile_fail/cannot_collect_filtermap_data.rs
new file mode 100644
index 0000000..65ff875
--- /dev/null
+++ b/src/compile_fail/cannot_collect_filtermap_data.rs
@@ -0,0 +1,16 @@
+/*! ```compile_fail,E0599
+
+use rayon::prelude::*;
+
+// zip requires data of exact size, but filter yields only bounded
+// size, so check that we cannot apply it.
+
+fn main() {
+    let a: Vec<usize> = (0..1024).collect();
+    let mut v = vec![];
+    a.par_iter()
+     .filter_map(|&x| Some(x as f32))
+     .collect_into_vec(&mut v); //~ ERROR no method
+}
+
+``` */
diff --git a/src/compile_fail/cannot_zip_filtered_data.rs b/src/compile_fail/cannot_zip_filtered_data.rs
new file mode 100644
index 0000000..de43fca
--- /dev/null
+++ b/src/compile_fail/cannot_zip_filtered_data.rs
@@ -0,0 +1,16 @@
+/*! ```compile_fail,E0277
+
+use rayon::prelude::*;
+
+// zip requires data of exact size, but filter yields only bounded
+// size, so check that we cannot apply it.
+
+fn main() {
+    let mut a: Vec<usize> = (0..1024).rev().collect();
+    let b: Vec<usize> = (0..1024).collect();
+
+    a.par_iter()
+     .zip(b.par_iter().filter(|&&x| x > 3)); //~ ERROR
+}
+
+``` */
diff --git a/src/compile_fail/cell_par_iter.rs b/src/compile_fail/cell_par_iter.rs
new file mode 100644
index 0000000..4af04b1
--- /dev/null
+++ b/src/compile_fail/cell_par_iter.rs
@@ -0,0 +1,15 @@
+/*! ```compile_fail,E0277
+
+// Check that we can't use the par-iter API to access contents of a `Cell`.
+
+use rayon::prelude::*;
+use std::cell::Cell;
+
+fn main() {
+    let c = Cell::new(42_i32);
+    (0_i32..1024).into_par_iter()
+             .map(|_| c.get()) //~ ERROR E0277
+             .min();
+}
+
+``` */
diff --git a/src/compile_fail/mod.rs b/src/compile_fail/mod.rs
new file mode 100644
index 0000000..13209a4
--- /dev/null
+++ b/src/compile_fail/mod.rs
@@ -0,0 +1,7 @@
+// These modules contain `compile_fail` doc tests.
+mod cannot_collect_filtermap_data;
+mod cannot_zip_filtered_data;
+mod cell_par_iter;
+mod must_use;
+mod no_send_par_iter;
+mod rc_par_iter;
diff --git a/src/compile_fail/must_use.rs b/src/compile_fail/must_use.rs
new file mode 100644
index 0000000..ac50a62
--- /dev/null
+++ b/src/compile_fail/must_use.rs
@@ -0,0 +1,67 @@
+// Check that we are flagged for ignoring `must_use` parallel adaptors.
+// (unfortunately there's no error code for `unused_must_use`)
+
+macro_rules! must_use {
+    ($( $name:ident #[$expr:meta] )*) => {$(
+        /// First sanity check that the expression is OK.
+        ///
+        /// ```
+        /// #![deny(unused_must_use)]
+        ///
+        /// use rayon::prelude::*;
+        ///
+        /// let v: Vec<_> = (0..100).map(Some).collect();
+        /// let _ =
+        #[$expr]
+        /// ```
+        ///
+        /// Now trigger the `must_use`.
+        ///
+        /// ```compile_fail
+        /// #![deny(unused_must_use)]
+        ///
+        /// use rayon::prelude::*;
+        ///
+        /// let v: Vec<_> = (0..100).map(Some).collect();
+        #[$expr]
+        /// ```
+        mod $name {}
+    )*}
+}
+
+must_use! {
+    step_by             /** v.par_iter().step_by(2); */
+    chain               /** v.par_iter().chain(&v); */
+    chunks              /** v.par_iter().chunks(2); */
+    cloned              /** v.par_iter().cloned(); */
+    copied              /** v.par_iter().copied(); */
+    enumerate           /** v.par_iter().enumerate(); */
+    filter              /** v.par_iter().filter(|_| true); */
+    filter_map          /** v.par_iter().filter_map(|x| *x); */
+    flat_map            /** v.par_iter().flat_map(|x| *x); */
+    flat_map_iter       /** v.par_iter().flat_map_iter(|x| *x); */
+    flatten             /** v.par_iter().flatten(); */
+    flatten_iter        /** v.par_iter().flatten_iter(); */
+    fold                /** v.par_iter().fold(|| 0, |x, _| x); */
+    fold_with           /** v.par_iter().fold_with(0, |x, _| x); */
+    try_fold            /** v.par_iter().try_fold(|| 0, |x, _| Some(x)); */
+    try_fold_with       /** v.par_iter().try_fold_with(0, |x, _| Some(x)); */
+    inspect             /** v.par_iter().inspect(|_| {}); */
+    interleave          /** v.par_iter().interleave(&v); */
+    interleave_shortest /** v.par_iter().interleave_shortest(&v); */
+    intersperse         /** v.par_iter().intersperse(&None); */
+    map                 /** v.par_iter().map(|x| x); */
+    map_with            /** v.par_iter().map_with(0, |_, x| x); */
+    map_init            /** v.par_iter().map_init(|| 0, |_, x| x); */
+    panic_fuse          /** v.par_iter().panic_fuse(); */
+    positions           /** v.par_iter().positions(|_| true); */
+    rev                 /** v.par_iter().rev(); */
+    skip                /** v.par_iter().skip(1); */
+    take                /** v.par_iter().take(1); */
+    update              /** v.par_iter().update(|_| {}); */
+    while_some          /** v.par_iter().cloned().while_some(); */
+    with_max_len        /** v.par_iter().with_max_len(1); */
+    with_min_len        /** v.par_iter().with_min_len(1); */
+    zip                 /** v.par_iter().zip(&v); */
+    zip_eq              /** v.par_iter().zip_eq(&v); */
+}
diff --git a/src/compile_fail/no_send_par_iter.rs b/src/compile_fail/no_send_par_iter.rs
new file mode 100644
index 0000000..1362c98
--- /dev/null
+++ b/src/compile_fail/no_send_par_iter.rs
@@ -0,0 +1,64 @@
+// Check that `!Send` types fail early.
+
+/** ```compile_fail,E0277
+
+use rayon::prelude::*;
+use std::ptr::null;
+
+#[derive(Copy, Clone)]
+struct NoSend(*const ());
+
+unsafe impl Sync for NoSend {}
+
+fn main() {
+    let x = Some(NoSend(null()));
+
+    x.par_iter()
+        .map(|&x| x) //~ ERROR
+        .count(); //~ ERROR
+}
+
+``` */
+mod map {}
+
+/** ```compile_fail,E0277
+
+use rayon::prelude::*;
+use std::ptr::null;
+
+#[derive(Copy, Clone)]
+struct NoSend(*const ());
+
+unsafe impl Sync for NoSend {}
+
+fn main() {
+    let x = Some(NoSend(null()));
+
+    x.par_iter()
+        .filter_map(|&x| Some(x)) //~ ERROR
+        .count(); //~ ERROR
+}
+
+``` */
+mod filter_map {}
+
+/** ```compile_fail,E0277
+
+use rayon::prelude::*;
+use std::ptr::null;
+
+#[derive(Copy, Clone)]
+struct NoSend(*const ());
+
+unsafe impl Sync for NoSend {}
+
+fn main() {
+    let x = Some(NoSend(null()));
+
+    x.par_iter()
+        .cloned() //~ ERROR
+        .count(); //~ ERROR
+}
+
+``` */
+mod cloned {}
diff --git a/src/compile_fail/rc_par_iter.rs b/src/compile_fail/rc_par_iter.rs
new file mode 100644
index 0000000..feaedb3
--- /dev/null
+++ b/src/compile_fail/rc_par_iter.rs
@@ -0,0 +1,17 @@
+/*! ```compile_fail,E0599
+
+// Check that we can't use the par-iter API to access contents of an
+// `Rc`.
+
+use rayon::prelude::*;
+use std::rc::Rc;
+
+fn main() {
+    let x = vec![Rc::new(22), Rc::new(23)];
+    let mut y = vec![];
+    x.into_par_iter() //~ ERROR no method named `into_par_iter`
+     .map(|rc| *rc)
+     .collect_into_vec(&mut y);
+}
+
+``` */
diff --git a/src/delegate.rs b/src/delegate.rs
new file mode 100644
index 0000000..a537489
--- /dev/null
+++ b/src/delegate.rs
@@ -0,0 +1,70 @@
+//! Macros for delegating newtype iterators to inner types.
+
+// Note: these place `impl` bounds at the end, as token gobbling is the only way
+// I know how to consume an arbitrary list of constraints, with `$($args:tt)*`.
+
+/// Creates a parallel iterator implementation which simply wraps an inner type
+/// and delegates all methods inward.  The actual struct must already be
+/// declared with an `inner` field.
+///
+/// The implementation of `IntoParallelIterator` should be added separately.
+///
+/// # Example
+///
+/// ```
+/// delegate_iterator!{
+///     MyIntoIter<T, U> => (T, U),
+///     impl<T: Ord + Send, U: Send>
+/// }
+/// ```
+macro_rules! delegate_iterator {
+    ($iter:ty => $item:ty ,
+     impl $( $args:tt )*
+     ) => {
+        impl $( $args )* ParallelIterator for $iter {
+            type Item = $item;
+
+            fn drive_unindexed<C>(self, consumer: C) -> C::Result
+                where C: UnindexedConsumer<Self::Item>
+            {
+                self.inner.drive_unindexed(consumer)
+            }
+
+            fn opt_len(&self) -> Option<usize> {
+                self.inner.opt_len()
+            }
+        }
+    }
+}
+
+/// Creates an indexed parallel iterator implementation which simply wraps an
+/// inner type and delegates all methods inward.  The actual struct must already
+/// be declared with an `inner` field.
+macro_rules! delegate_indexed_iterator {
+    ($iter:ty => $item:ty ,
+     impl $( $args:tt )*
+     ) => {
+        delegate_iterator!{
+            $iter => $item ,
+            impl $( $args )*
+        }
+
+        impl $( $args )* IndexedParallelIterator for $iter {
+            fn drive<C>(self, consumer: C) -> C::Result
+                where C: Consumer<Self::Item>
+            {
+                self.inner.drive(consumer)
+            }
+
+            fn len(&self) -> usize {
+                self.inner.len()
+            }
+
+            fn with_producer<CB>(self, callback: CB) -> CB::Output
+                where CB: ProducerCallback<Self::Item>
+            {
+                self.inner.with_producer(callback)
+            }
+        }
+    }
+}
diff --git a/src/iter/chain.rs b/src/iter/chain.rs
new file mode 100644
index 0000000..48fce07
--- /dev/null
+++ b/src/iter/chain.rs
@@ -0,0 +1,268 @@
+use super::plumbing::*;
+use super::*;
+use rayon_core::join;
+use std::cmp;
+use std::iter;
+
+/// `Chain` is an iterator that joins `b` after `a` in one continuous iterator.
+/// This struct is created by the [`chain()`] method on [`ParallelIterator`]
+///
+/// [`chain()`]: trait.ParallelIterator.html#method.chain
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Debug, Clone)]
+pub struct Chain<A, B>
+where
+    A: ParallelIterator,
+    B: ParallelIterator<Item = A::Item>,
+{
+    a: A,
+    b: B,
+}
+
+impl<A, B> Chain<A, B>
+where
+    A: ParallelIterator,
+    B: ParallelIterator<Item = A::Item>,
+{
+    /// Creates a new `Chain` iterator.
+    pub(super) fn new(a: A, b: B) -> Self {
+        Chain { a, b }
+    }
+}
+
+impl<A, B> ParallelIterator for Chain<A, B>
+where
+    A: ParallelIterator,
+    B: ParallelIterator<Item = A::Item>,
+{
+    type Item = A::Item;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let Chain { a, b } = self;
+
+        // If we returned a value from our own `opt_len`, then the collect consumer in particular
+        // will balk at being treated like an actual `UnindexedConsumer`.  But when we do know the
+        // length, we can use `Consumer::split_at` instead, and this is still harmless for other
+        // truly-unindexed consumers too.
+        let (left, right, reducer) = if let Some(len) = a.opt_len() {
+            consumer.split_at(len)
+        } else {
+            let reducer = consumer.to_reducer();
+            (consumer.split_off_left(), consumer, reducer)
+        };
+
+        let (a, b) = join(|| a.drive_unindexed(left), || b.drive_unindexed(right));
+        reducer.reduce(a, b)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        self.a.opt_len()?.checked_add(self.b.opt_len()?)
+    }
+}
+
+impl<A, B> IndexedParallelIterator for Chain<A, B>
+where
+    A: IndexedParallelIterator,
+    B: IndexedParallelIterator<Item = A::Item>,
+{
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        let Chain { a, b } = self;
+        let (left, right, reducer) = consumer.split_at(a.len());
+        let (a, b) = join(|| a.drive(left), || b.drive(right));
+        reducer.reduce(a, b)
+    }
+
+    fn len(&self) -> usize {
+        self.a.len().checked_add(self.b.len()).expect("overflow")
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        let a_len = self.a.len();
+        return self.a.with_producer(CallbackA {
+            callback,
+            a_len,
+            b: self.b,
+        });
+
+        struct CallbackA<CB, B> {
+            callback: CB,
+            a_len: usize,
+            b: B,
+        }
+
+        impl<CB, B> ProducerCallback<B::Item> for CallbackA<CB, B>
+        where
+            B: IndexedParallelIterator,
+            CB: ProducerCallback<B::Item>,
+        {
+            type Output = CB::Output;
+
+            fn callback<A>(self, a_producer: A) -> Self::Output
+            where
+                A: Producer<Item = B::Item>,
+            {
+                self.b.with_producer(CallbackB {
+                    callback: self.callback,
+                    a_len: self.a_len,
+                    a_producer,
+                })
+            }
+        }
+
+        struct CallbackB<CB, A> {
+            callback: CB,
+            a_len: usize,
+            a_producer: A,
+        }
+
+        impl<CB, A> ProducerCallback<A::Item> for CallbackB<CB, A>
+        where
+            A: Producer,
+            CB: ProducerCallback<A::Item>,
+        {
+            type Output = CB::Output;
+
+            fn callback<B>(self, b_producer: B) -> Self::Output
+            where
+                B: Producer<Item = A::Item>,
+            {
+                let producer = ChainProducer::new(self.a_len, self.a_producer, b_producer);
+                self.callback.callback(producer)
+            }
+        }
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+
+struct ChainProducer<A, B>
+where
+    A: Producer,
+    B: Producer<Item = A::Item>,
+{
+    a_len: usize,
+    a: A,
+    b: B,
+}
+
+impl<A, B> ChainProducer<A, B>
+where
+    A: Producer,
+    B: Producer<Item = A::Item>,
+{
+    fn new(a_len: usize, a: A, b: B) -> Self {
+        ChainProducer { a_len, a, b }
+    }
+}
+
+impl<A, B> Producer for ChainProducer<A, B>
+where
+    A: Producer,
+    B: Producer<Item = A::Item>,
+{
+    type Item = A::Item;
+    type IntoIter = ChainSeq<A::IntoIter, B::IntoIter>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        ChainSeq::new(self.a.into_iter(), self.b.into_iter())
+    }
+
+    fn min_len(&self) -> usize {
+        cmp::max(self.a.min_len(), self.b.min_len())
+    }
+
+    fn max_len(&self) -> usize {
+        cmp::min(self.a.max_len(), self.b.max_len())
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        if index <= self.a_len {
+            let a_rem = self.a_len - index;
+            let (a_left, a_right) = self.a.split_at(index);
+            let (b_left, b_right) = self.b.split_at(0);
+            (
+                ChainProducer::new(index, a_left, b_left),
+                ChainProducer::new(a_rem, a_right, b_right),
+            )
+        } else {
+            let (a_left, a_right) = self.a.split_at(self.a_len);
+            let (b_left, b_right) = self.b.split_at(index - self.a_len);
+            (
+                ChainProducer::new(self.a_len, a_left, b_left),
+                ChainProducer::new(0, a_right, b_right),
+            )
+        }
+    }
+
+    fn fold_with<F>(self, mut folder: F) -> F
+    where
+        F: Folder<A::Item>,
+    {
+        folder = self.a.fold_with(folder);
+        if folder.full() {
+            folder
+        } else {
+            self.b.fold_with(folder)
+        }
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// Wrapper for Chain to implement ExactSizeIterator
+
+struct ChainSeq<A, B> {
+    chain: iter::Chain<A, B>,
+}
+
+impl<A, B> ChainSeq<A, B> {
+    fn new(a: A, b: B) -> ChainSeq<A, B>
+    where
+        A: ExactSizeIterator,
+        B: ExactSizeIterator<Item = A::Item>,
+    {
+        ChainSeq { chain: a.chain(b) }
+    }
+}
+
+impl<A, B> Iterator for ChainSeq<A, B>
+where
+    A: Iterator,
+    B: Iterator<Item = A::Item>,
+{
+    type Item = A::Item;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.chain.next()
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.chain.size_hint()
+    }
+}
+
+impl<A, B> ExactSizeIterator for ChainSeq<A, B>
+where
+    A: ExactSizeIterator,
+    B: ExactSizeIterator<Item = A::Item>,
+{
+}
+
+impl<A, B> DoubleEndedIterator for ChainSeq<A, B>
+where
+    A: DoubleEndedIterator,
+    B: DoubleEndedIterator<Item = A::Item>,
+{
+    fn next_back(&mut self) -> Option<Self::Item> {
+        self.chain.next_back()
+    }
+}
diff --git a/src/iter/chunks.rs b/src/iter/chunks.rs
new file mode 100644
index 0000000..be5f84c
--- /dev/null
+++ b/src/iter/chunks.rs
@@ -0,0 +1,216 @@
+use std::cmp::min;
+
+use super::plumbing::*;
+use super::*;
+use crate::math::div_round_up;
+
+/// `Chunks` is an iterator that groups elements of an underlying iterator.
+///
+/// This struct is created by the [`chunks()`] method on [`IndexedParallelIterator`]
+///
+/// [`chunks()`]: trait.IndexedParallelIterator.html#method.chunks
+/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Debug, Clone)]
+pub struct Chunks<I>
+where
+    I: IndexedParallelIterator,
+{
+    size: usize,
+    i: I,
+}
+
+impl<I> Chunks<I>
+where
+    I: IndexedParallelIterator,
+{
+    /// Creates a new `Chunks` iterator
+    pub(super) fn new(i: I, size: usize) -> Self {
+        Chunks { i, size }
+    }
+}
+
+impl<I> ParallelIterator for Chunks<I>
+where
+    I: IndexedParallelIterator,
+{
+    type Item = Vec<I::Item>;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Vec<I::Item>>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<I> IndexedParallelIterator for Chunks<I>
+where
+    I: IndexedParallelIterator,
+{
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn len(&self) -> usize {
+        div_round_up(self.i.len(), self.size)
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        let len = self.i.len();
+        return self.i.with_producer(Callback {
+            size: self.size,
+            len,
+            callback,
+        });
+
+        struct Callback<CB> {
+            size: usize,
+            len: usize,
+            callback: CB,
+        }
+
+        impl<T, CB> ProducerCallback<T> for Callback<CB>
+        where
+            CB: ProducerCallback<Vec<T>>,
+        {
+            type Output = CB::Output;
+
+            fn callback<P>(self, base: P) -> CB::Output
+            where
+                P: Producer<Item = T>,
+            {
+                self.callback.callback(ChunkProducer {
+                    chunk_size: self.size,
+                    len: self.len,
+                    base,
+                })
+            }
+        }
+    }
+}
+
+struct ChunkProducer<P>
+where
+    P: Producer,
+{
+    chunk_size: usize,
+    len: usize,
+    base: P,
+}
+
+impl<P> Producer for ChunkProducer<P>
+where
+    P: Producer,
+{
+    type Item = Vec<P::Item>;
+    type IntoIter = ChunkSeq<P>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        ChunkSeq {
+            chunk_size: self.chunk_size,
+            len: self.len,
+            inner: if self.len > 0 { Some(self.base) } else { None },
+        }
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        let elem_index = min(index * self.chunk_size, self.len);
+        let (left, right) = self.base.split_at(elem_index);
+        (
+            ChunkProducer {
+                chunk_size: self.chunk_size,
+                len: elem_index,
+                base: left,
+            },
+            ChunkProducer {
+                chunk_size: self.chunk_size,
+                len: self.len - elem_index,
+                base: right,
+            },
+        )
+    }
+
+    fn min_len(&self) -> usize {
+        div_round_up(self.base.min_len(), self.chunk_size)
+    }
+
+    fn max_len(&self) -> usize {
+        self.base.max_len() / self.chunk_size
+    }
+}
+
+struct ChunkSeq<P> {
+    chunk_size: usize,
+    len: usize,
+    inner: Option<P>,
+}
+
+impl<P> Iterator for ChunkSeq<P>
+where
+    P: Producer,
+{
+    type Item = Vec<P::Item>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let producer = self.inner.take()?;
+        if self.len > self.chunk_size {
+            let (left, right) = producer.split_at(self.chunk_size);
+            self.inner = Some(right);
+            self.len -= self.chunk_size;
+            Some(left.into_iter().collect())
+        } else {
+            debug_assert!(self.len > 0);
+            self.len = 0;
+            Some(producer.into_iter().collect())
+        }
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        let len = self.len();
+        (len, Some(len))
+    }
+}
+
+impl<P> ExactSizeIterator for ChunkSeq<P>
+where
+    P: Producer,
+{
+    #[inline]
+    fn len(&self) -> usize {
+        div_round_up(self.len, self.chunk_size)
+    }
+}
+
+impl<P> DoubleEndedIterator for ChunkSeq<P>
+where
+    P: Producer,
+{
+    fn next_back(&mut self) -> Option<Self::Item> {
+        let producer = self.inner.take()?;
+        if self.len > self.chunk_size {
+            let mut size = self.len % self.chunk_size;
+            if size == 0 {
+                size = self.chunk_size;
+            }
+            let (left, right) = producer.split_at(self.len - size);
+            self.inner = Some(left);
+            self.len -= size;
+            Some(right.into_iter().collect())
+        } else {
+            debug_assert!(self.len > 0);
+            self.len = 0;
+            Some(producer.into_iter().collect())
+        }
+    }
+}
diff --git a/src/iter/cloned.rs b/src/iter/cloned.rs
new file mode 100644
index 0000000..8d5f420
--- /dev/null
+++ b/src/iter/cloned.rs
@@ -0,0 +1,223 @@
+use super::plumbing::*;
+use super::*;
+
+use std::iter;
+
+/// `Cloned` is an iterator that clones the elements of an underlying iterator.
+///
+/// This struct is created by the [`cloned()`] method on [`ParallelIterator`]
+///
+/// [`cloned()`]: trait.ParallelIterator.html#method.cloned
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Debug, Clone)]
+pub struct Cloned<I: ParallelIterator> {
+    base: I,
+}
+
+impl<I> Cloned<I>
+where
+    I: ParallelIterator,
+{
+    /// Creates a new `Cloned` iterator.
+    pub(super) fn new(base: I) -> Self {
+        Cloned { base }
+    }
+}
+
+impl<'a, T, I> ParallelIterator for Cloned<I>
+where
+    I: ParallelIterator<Item = &'a T>,
+    T: 'a + Clone + Send + Sync,
+{
+    type Item = T;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let consumer1 = ClonedConsumer::new(consumer);
+        self.base.drive_unindexed(consumer1)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        self.base.opt_len()
+    }
+}
+
+impl<'a, T, I> IndexedParallelIterator for Cloned<I>
+where
+    I: IndexedParallelIterator<Item = &'a T>,
+    T: 'a + Clone + Send + Sync,
+{
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        let consumer1 = ClonedConsumer::new(consumer);
+        self.base.drive(consumer1)
+    }
+
+    fn len(&self) -> usize {
+        self.base.len()
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        return self.base.with_producer(Callback { callback });
+
+        struct Callback<CB> {
+            callback: CB,
+        }
+
+        impl<'a, T, CB> ProducerCallback<&'a T> for Callback<CB>
+        where
+            CB: ProducerCallback<T>,
+            T: 'a + Clone + Send,
+        {
+            type Output = CB::Output;
+
+            fn callback<P>(self, base: P) -> CB::Output
+            where
+                P: Producer<Item = &'a T>,
+            {
+                let producer = ClonedProducer { base };
+                self.callback.callback(producer)
+            }
+        }
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+
+struct ClonedProducer<P> {
+    base: P,
+}
+
+impl<'a, T, P> Producer for ClonedProducer<P>
+where
+    P: Producer<Item = &'a T>,
+    T: 'a + Clone,
+{
+    type Item = T;
+    type IntoIter = iter::Cloned<P::IntoIter>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.base.into_iter().cloned()
+    }
+
+    fn min_len(&self) -> usize {
+        self.base.min_len()
+    }
+
+    fn max_len(&self) -> usize {
+        self.base.max_len()
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        let (left, right) = self.base.split_at(index);
+        (
+            ClonedProducer { base: left },
+            ClonedProducer { base: right },
+        )
+    }
+
+    fn fold_with<F>(self, folder: F) -> F
+    where
+        F: Folder<Self::Item>,
+    {
+        self.base.fold_with(ClonedFolder { base: folder }).base
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// Consumer implementation
+
+struct ClonedConsumer<C> {
+    base: C,
+}
+
+impl<C> ClonedConsumer<C> {
+    fn new(base: C) -> Self {
+        ClonedConsumer { base }
+    }
+}
+
+impl<'a, T, C> Consumer<&'a T> for ClonedConsumer<C>
+where
+    C: Consumer<T>,
+    T: 'a + Clone,
+{
+    type Folder = ClonedFolder<C::Folder>;
+    type Reducer = C::Reducer;
+    type Result = C::Result;
+
+    fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) {
+        let (left, right, reducer) = self.base.split_at(index);
+        (
+            ClonedConsumer::new(left),
+            ClonedConsumer::new(right),
+            reducer,
+        )
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        ClonedFolder {
+            base: self.base.into_folder(),
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
+
+impl<'a, T, C> UnindexedConsumer<&'a T> for ClonedConsumer<C>
+where
+    C: UnindexedConsumer<T>,
+    T: 'a + Clone,
+{
+    fn split_off_left(&self) -> Self {
+        ClonedConsumer::new(self.base.split_off_left())
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        self.base.to_reducer()
+    }
+}
+
+struct ClonedFolder<F> {
+    base: F,
+}
+
+impl<'a, T, F> Folder<&'a T> for ClonedFolder<F>
+where
+    F: Folder<T>,
+    T: 'a + Clone,
+{
+    type Result = F::Result;
+
+    fn consume(self, item: &'a T) -> Self {
+        ClonedFolder {
+            base: self.base.consume(item.clone()),
+        }
+    }
+
+    fn consume_iter<I>(mut self, iter: I) -> Self
+    where
+        I: IntoIterator<Item = &'a T>,
+    {
+        self.base = self.base.consume_iter(iter.into_iter().cloned());
+        self
+    }
+
+    fn complete(self) -> F::Result {
+        self.base.complete()
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
diff --git a/src/iter/collect/consumer.rs b/src/iter/collect/consumer.rs
new file mode 100644
index 0000000..689f29c
--- /dev/null
+++ b/src/iter/collect/consumer.rs
@@ -0,0 +1,159 @@
+use super::super::plumbing::*;
+use std::marker::PhantomData;
+use std::ptr;
+use std::slice;
+
+pub(super) struct CollectConsumer<'c, T: Send> {
+    /// A slice covering the target memory, not yet initialized!
+    target: &'c mut [T],
+}
+
+pub(super) struct CollectFolder<'c, T: Send> {
+    /// The folder writes into `result` and must extend the result
+    /// up to exactly this number of elements.
+    final_len: usize,
+
+    /// The current written-to part of our slice of the target
+    result: CollectResult<'c, T>,
+}
+
+impl<'c, T: Send + 'c> CollectConsumer<'c, T> {
+    /// The target memory is considered uninitialized, and will be
+    /// overwritten without reading or dropping existing values.
+    pub(super) fn new(target: &'c mut [T]) -> Self {
+        CollectConsumer { target }
+    }
+}
+
+/// CollectResult represents an initialized part of the target slice.
+///
+/// This is a proxy owner of the elements in the slice; when it drops,
+/// the elements will be dropped, unless its ownership is released before then.
+#[must_use]
+pub(super) struct CollectResult<'c, T> {
+    start: *mut T,
+    len: usize,
+    invariant_lifetime: PhantomData<&'c mut &'c mut [T]>,
+}
+
+unsafe impl<'c, T> Send for CollectResult<'c, T> where T: Send {}
+
+impl<'c, T> CollectResult<'c, T> {
+    /// The current length of the collect result
+    pub(super) fn len(&self) -> usize {
+        self.len
+    }
+
+    /// Release ownership of the slice of elements, and return the length
+    pub(super) fn release_ownership(mut self) -> usize {
+        let ret = self.len;
+        self.len = 0;
+        ret
+    }
+}
+
+impl<'c, T> Drop for CollectResult<'c, T> {
+    fn drop(&mut self) {
+        // Drop the first `self.len` elements, which have been recorded
+        // to be initialized by the folder.
+        unsafe {
+            ptr::drop_in_place(slice::from_raw_parts_mut(self.start, self.len));
+        }
+    }
+}
+
+impl<'c, T: Send + 'c> Consumer<T> for CollectConsumer<'c, T> {
+    type Folder = CollectFolder<'c, T>;
+    type Reducer = CollectReducer;
+    type Result = CollectResult<'c, T>;
+
+    fn split_at(self, index: usize) -> (Self, Self, CollectReducer) {
+        let CollectConsumer { target } = self;
+
+        // Produce new consumers. Normal slicing ensures that the
+        // memory range given to each consumer is disjoint.
+        let (left, right) = target.split_at_mut(index);
+        (
+            CollectConsumer::new(left),
+            CollectConsumer::new(right),
+            CollectReducer,
+        )
+    }
+
+    fn into_folder(self) -> CollectFolder<'c, T> {
+        // Create a folder that consumes values and writes them
+        // into target. The initial result has length 0.
+        CollectFolder {
+            final_len: self.target.len(),
+            result: CollectResult {
+                start: self.target.as_mut_ptr(),
+                len: 0,
+                invariant_lifetime: PhantomData,
+            },
+        }
+    }
+
+    fn full(&self) -> bool {
+        false
+    }
+}
+
+impl<'c, T: Send + 'c> Folder<T> for CollectFolder<'c, T> {
+    type Result = CollectResult<'c, T>;
+
+    fn consume(mut self, item: T) -> CollectFolder<'c, T> {
+        if self.result.len >= self.final_len {
+            panic!("too many values pushed to consumer");
+        }
+
+        // Compute target pointer and write to it, and
+        // extend the current result by one element
+        unsafe {
+            self.result.start.add(self.result.len).write(item);
+            self.result.len += 1;
+        }
+
+        self
+    }
+
+    fn complete(self) -> Self::Result {
+        // NB: We don't explicitly check that the local writes were complete,
+        // but Collect will assert the total result length in the end.
+        self.result
+    }
+
+    fn full(&self) -> bool {
+        false
+    }
+}
+
+/// Pretend to be unindexed for `special_collect_into_vec`,
+/// but we should never actually get used that way...
+impl<'c, T: Send + 'c> UnindexedConsumer<T> for CollectConsumer<'c, T> {
+    fn split_off_left(&self) -> Self {
+        unreachable!("CollectConsumer must be indexed!")
+    }
+    fn to_reducer(&self) -> Self::Reducer {
+        CollectReducer
+    }
+}
+
+/// CollectReducer combines adjacent chunks; the result must always
+/// be contiguous so that it is one combined slice.
+pub(super) struct CollectReducer;
+
+impl<'c, T> Reducer<CollectResult<'c, T>> for CollectReducer {
+    fn reduce(
+        self,
+        mut left: CollectResult<'c, T>,
+        right: CollectResult<'c, T>,
+    ) -> CollectResult<'c, T> {
+        // Merge if the CollectResults are adjacent and in left to right order
+        // else: drop the right piece now and total length will end up short in the end,
+        // when the correctness of the collected result is asserted.
+        if left.start.wrapping_add(left.len) == right.start {
+            left.len += right.release_ownership();
+        }
+        left
+    }
+}
diff --git a/src/iter/collect/mod.rs b/src/iter/collect/mod.rs
new file mode 100644
index 0000000..e18298e
--- /dev/null
+++ b/src/iter/collect/mod.rs
@@ -0,0 +1,171 @@
+use super::{IndexedParallelIterator, IntoParallelIterator, ParallelExtend, ParallelIterator};
+use std::slice;
+
+mod consumer;
+use self::consumer::CollectConsumer;
+use self::consumer::CollectResult;
+use super::unzip::unzip_indexed;
+
+mod test;
+
+/// Collects the results of the exact iterator into the specified vector.
+///
+/// This is called by `IndexedParallelIterator::collect_into_vec`.
+pub(super) fn collect_into_vec<I, T>(pi: I, v: &mut Vec<T>)
+where
+    I: IndexedParallelIterator<Item = T>,
+    T: Send,
+{
+    v.truncate(0); // clear any old data
+    let len = pi.len();
+    Collect::new(v, len).with_consumer(|consumer| pi.drive(consumer));
+}
+
+/// Collects the results of the iterator into the specified vector.
+///
+/// Technically, this only works for `IndexedParallelIterator`, but we're faking a
+/// bit of specialization here until Rust can do that natively.  Callers are
+/// using `opt_len` to find the length before calling this, and only exact
+/// iterators will return anything but `None` there.
+///
+/// Since the type system doesn't understand that contract, we have to allow
+/// *any* `ParallelIterator` here, and `CollectConsumer` has to also implement
+/// `UnindexedConsumer`.  That implementation panics `unreachable!` in case
+/// there's a bug where we actually do try to use this unindexed.
+fn special_extend<I, T>(pi: I, len: usize, v: &mut Vec<T>)
+where
+    I: ParallelIterator<Item = T>,
+    T: Send,
+{
+    Collect::new(v, len).with_consumer(|consumer| pi.drive_unindexed(consumer));
+}
+
+/// Unzips the results of the exact iterator into the specified vectors.
+///
+/// This is called by `IndexedParallelIterator::unzip_into_vecs`.
+pub(super) fn unzip_into_vecs<I, A, B>(pi: I, left: &mut Vec<A>, right: &mut Vec<B>)
+where
+    I: IndexedParallelIterator<Item = (A, B)>,
+    A: Send,
+    B: Send,
+{
+    // clear any old data
+    left.truncate(0);
+    right.truncate(0);
+
+    let len = pi.len();
+    Collect::new(right, len).with_consumer(|right_consumer| {
+        let mut right_result = None;
+        Collect::new(left, len).with_consumer(|left_consumer| {
+            let (left_r, right_r) = unzip_indexed(pi, left_consumer, right_consumer);
+            right_result = Some(right_r);
+            left_r
+        });
+        right_result.unwrap()
+    });
+}
+
+/// Manage the collection vector.
+struct Collect<'c, T: Send> {
+    vec: &'c mut Vec<T>,
+    len: usize,
+}
+
+impl<'c, T: Send + 'c> Collect<'c, T> {
+    fn new(vec: &'c mut Vec<T>, len: usize) -> Self {
+        Collect { vec, len }
+    }
+
+    /// Create a consumer on the slice of memory we are collecting into.
+    ///
+    /// The consumer needs to be used inside the scope function, and the
+    /// complete collect result passed back.
+    ///
+    /// This method will verify the collect result, and panic if the slice
+    /// was not fully written into. Otherwise, in the successful case,
+    /// the vector is complete with the collected result.
+    fn with_consumer<F>(mut self, scope_fn: F)
+    where
+        F: FnOnce(CollectConsumer<'_, T>) -> CollectResult<'_, T>,
+    {
+        unsafe {
+            let slice = Self::reserve_get_tail_slice(&mut self.vec, self.len);
+            let result = scope_fn(CollectConsumer::new(slice));
+
+            // The CollectResult represents a contiguous part of the
+            // slice, that has been written to.
+            // On unwind here, the CollectResult will be dropped.
+            // If some producers on the way did not produce enough elements,
+            // partial CollectResults may have been dropped without
+            // being reduced to the final result, and we will see
+            // that as the length coming up short.
+            //
+            // Here, we assert that `slice` is fully initialized. This is
+            // checked by the following assert, which verifies if a
+            // complete CollectResult was produced; if the length is
+            // correct, it is necessarily covering the target slice.
+            // Since we know that the consumer cannot have escaped from
+            // `drive` (by parametricity, essentially), we know that any
+            // stores that will happen, have happened. Unless some code is buggy,
+            // that means we should have seen `len` total writes.
+            let actual_writes = result.len();
+            assert!(
+                actual_writes == self.len,
+                "expected {} total writes, but got {}",
+                self.len,
+                actual_writes
+            );
+
+            // Release the result's mutable borrow and "proxy ownership"
+            // of the elements, before the vector takes it over.
+            result.release_ownership();
+
+            let new_len = self.vec.len() + self.len;
+            self.vec.set_len(new_len);
+        }
+    }
+
+    /// Reserve space for `len` more elements in the vector,
+    /// and return a slice to the uninitialized tail of the vector
+    ///
+    /// Safety: The tail slice is uninitialized
+    unsafe fn reserve_get_tail_slice(vec: &mut Vec<T>, len: usize) -> &mut [T] {
+        // Reserve the new space.
+        vec.reserve(len);
+
+        // Get a correct borrow, then extend it for the newly added length.
+        let start = vec.len();
+        let slice = &mut vec[start..];
+        slice::from_raw_parts_mut(slice.as_mut_ptr(), len)
+    }
+}
+
+/// Extends a vector with items from a parallel iterator.
+impl<T> ParallelExtend<T> for Vec<T>
+where
+    T: Send,
+{
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = T>,
+    {
+        // See the vec_collect benchmarks in rayon-demo for different strategies.
+        let par_iter = par_iter.into_par_iter();
+        match par_iter.opt_len() {
+            Some(len) => {
+                // When Rust gets specialization, we can get here for indexed iterators
+                // without relying on `opt_len`.  Until then, `special_extend()` fakes
+                // an unindexed mode on the promise that `opt_len()` is accurate.
+                special_extend(par_iter, len, self);
+            }
+            None => {
+                // This works like `extend`, but `Vec::append` is more efficient.
+                let list = super::extend::collect(par_iter);
+                self.reserve(super::extend::len(&list));
+                for mut vec in list {
+                    self.append(&mut vec);
+                }
+            }
+        }
+    }
+}
diff --git a/src/iter/collect/test.rs b/src/iter/collect/test.rs
new file mode 100644
index 0000000..00c16c4
--- /dev/null
+++ b/src/iter/collect/test.rs
@@ -0,0 +1,385 @@
+#![cfg(test)]
+#![allow(unused_assignments)]
+
+// These tests are primarily targeting "abusive" producers that will
+// try to drive the "collect consumer" incorrectly. These should
+// result in panics.
+
+use super::Collect;
+use crate::iter::plumbing::*;
+use rayon_core::join;
+
+use std::fmt;
+use std::panic;
+use std::sync::atomic::{AtomicUsize, Ordering};
+use std::thread::Result as ThreadResult;
+
+/// Promises to produce 2 items, but then produces 3.  Does not do any
+/// splits at all.
+#[test]
+#[should_panic(expected = "too many values")]
+fn produce_too_many_items() {
+    let mut v = vec![];
+    Collect::new(&mut v, 2).with_consumer(|consumer| {
+        let mut folder = consumer.into_folder();
+        folder = folder.consume(22);
+        folder = folder.consume(23);
+        folder.consume(24);
+        unreachable!("folder does not complete")
+    });
+}
+
+/// Produces fewer items than promised. Does not do any
+/// splits at all.
+#[test]
+#[should_panic(expected = "expected 5 total writes, but got 2")]
+fn produce_fewer_items() {
+    let mut v = vec![];
+    let collect = Collect::new(&mut v, 5);
+    collect.with_consumer(|consumer| {
+        let mut folder = consumer.into_folder();
+        folder = folder.consume(22);
+        folder = folder.consume(23);
+        folder.complete()
+    });
+}
+
+// Complete is not called by the consumer. Hence,the collection vector is not fully initialized.
+#[test]
+#[should_panic(expected = "expected 4 total writes, but got 2")]
+fn left_produces_items_with_no_complete() {
+    let mut v = vec![];
+    let collect = Collect::new(&mut v, 4);
+    collect.with_consumer(|consumer| {
+        let (left_consumer, right_consumer, _) = consumer.split_at(2);
+        let mut left_folder = left_consumer.into_folder();
+        let mut right_folder = right_consumer.into_folder();
+        left_folder = left_folder.consume(0).consume(1);
+        right_folder = right_folder.consume(2).consume(3);
+        right_folder.complete()
+    });
+}
+
+// Complete is not called by the right consumer. Hence,the
+// collection vector is not fully initialized.
+#[test]
+#[should_panic(expected = "expected 4 total writes, but got 2")]
+fn right_produces_items_with_no_complete() {
+    let mut v = vec![];
+    let collect = Collect::new(&mut v, 4);
+    collect.with_consumer(|consumer| {
+        let (left_consumer, right_consumer, _) = consumer.split_at(2);
+        let mut left_folder = left_consumer.into_folder();
+        let mut right_folder = right_consumer.into_folder();
+        left_folder = left_folder.consume(0).consume(1);
+        right_folder = right_folder.consume(2).consume(3);
+        left_folder.complete()
+    });
+}
+
+// Complete is not called by the consumer. Hence,the collection vector is not fully initialized.
+#[test]
+fn produces_items_with_no_complete() {
+    let counter = DropCounter::default();
+    let mut v = vec![];
+    let panic_result = panic::catch_unwind(panic::AssertUnwindSafe(|| {
+        let collect = Collect::new(&mut v, 2);
+        collect.with_consumer(|consumer| {
+            let mut folder = consumer.into_folder();
+            folder = folder.consume(counter.element());
+            folder = folder.consume(counter.element());
+            panic!("folder does not complete");
+        });
+    }));
+    assert!(v.is_empty());
+    assert_is_panic_with_message(&panic_result, "folder does not complete");
+    counter.assert_drop_count();
+}
+
+// The left consumer produces too many items while the right
+// consumer produces correct number.
+#[test]
+#[should_panic(expected = "too many values")]
+fn left_produces_too_many_items() {
+    let mut v = vec![];
+    let collect = Collect::new(&mut v, 4);
+    collect.with_consumer(|consumer| {
+        let (left_consumer, right_consumer, _) = consumer.split_at(2);
+        let mut left_folder = left_consumer.into_folder();
+        let mut right_folder = right_consumer.into_folder();
+        left_folder = left_folder.consume(0).consume(1).consume(2);
+        right_folder = right_folder.consume(2).consume(3);
+        let _ = right_folder.complete();
+        unreachable!("folder does not complete");
+    });
+}
+
+// The right consumer produces too many items while the left
+// consumer produces correct number.
+#[test]
+#[should_panic(expected = "too many values")]
+fn right_produces_too_many_items() {
+    let mut v = vec![];
+    let collect = Collect::new(&mut v, 4);
+    collect.with_consumer(|consumer| {
+        let (left_consumer, right_consumer, _) = consumer.split_at(2);
+        let mut left_folder = left_consumer.into_folder();
+        let mut right_folder = right_consumer.into_folder();
+        left_folder = left_folder.consume(0).consume(1);
+        right_folder = right_folder.consume(2).consume(3).consume(4);
+        let _ = left_folder.complete();
+        unreachable!("folder does not complete");
+    });
+}
+
+// The left consumer produces fewer items while the right
+// consumer produces correct number.
+#[test]
+#[should_panic(expected = "expected 4 total writes, but got 1")]
+fn left_produces_fewer_items() {
+    let mut v = vec![];
+    let collect = Collect::new(&mut v, 4);
+    collect.with_consumer(|consumer| {
+        let reducer = consumer.to_reducer();
+        let (left_consumer, right_consumer, _) = consumer.split_at(2);
+        let mut left_folder = left_consumer.into_folder();
+        let mut right_folder = right_consumer.into_folder();
+        left_folder = left_folder.consume(0);
+        right_folder = right_folder.consume(2).consume(3);
+        let left_result = left_folder.complete();
+        let right_result = right_folder.complete();
+        reducer.reduce(left_result, right_result)
+    });
+}
+
+// The left and right consumer produce the correct number but
+// only left result is returned
+#[test]
+#[should_panic(expected = "expected 4 total writes, but got 2")]
+fn only_left_result() {
+    let mut v = vec![];
+    let collect = Collect::new(&mut v, 4);
+    collect.with_consumer(|consumer| {
+        let (left_consumer, right_consumer, _) = consumer.split_at(2);
+        let mut left_folder = left_consumer.into_folder();
+        let mut right_folder = right_consumer.into_folder();
+        left_folder = left_folder.consume(0).consume(1);
+        right_folder = right_folder.consume(2).consume(3);
+        let left_result = left_folder.complete();
+        let _ = right_folder.complete();
+        left_result
+    });
+}
+
+// The left and right consumer produce the correct number but
+// only right result is returned
+#[test]
+#[should_panic(expected = "expected 4 total writes, but got 2")]
+fn only_right_result() {
+    let mut v = vec![];
+    let collect = Collect::new(&mut v, 4);
+    collect.with_consumer(|consumer| {
+        let (left_consumer, right_consumer, _) = consumer.split_at(2);
+        let mut left_folder = left_consumer.into_folder();
+        let mut right_folder = right_consumer.into_folder();
+        left_folder = left_folder.consume(0).consume(1);
+        right_folder = right_folder.consume(2).consume(3);
+        let _ = left_folder.complete();
+        right_folder.complete()
+    });
+}
+
+// The left and right consumer produce the correct number but reduce
+// in the wrong order.
+#[test]
+#[should_panic(expected = "expected 4 total writes, but got 2")]
+fn reducer_does_not_preserve_order() {
+    let mut v = vec![];
+    let collect = Collect::new(&mut v, 4);
+    collect.with_consumer(|consumer| {
+        let reducer = consumer.to_reducer();
+        let (left_consumer, right_consumer, _) = consumer.split_at(2);
+        let mut left_folder = left_consumer.into_folder();
+        let mut right_folder = right_consumer.into_folder();
+        left_folder = left_folder.consume(0).consume(1);
+        right_folder = right_folder.consume(2).consume(3);
+        let left_result = left_folder.complete();
+        let right_result = right_folder.complete();
+        reducer.reduce(right_result, left_result)
+    });
+}
+
+// The right consumer produces fewer items while the left
+// consumer produces correct number.
+#[test]
+#[should_panic(expected = "expected 4 total writes, but got 3")]
+fn right_produces_fewer_items() {
+    let mut v = vec![];
+    let collect = Collect::new(&mut v, 4);
+    collect.with_consumer(|consumer| {
+        let reducer = consumer.to_reducer();
+        let (left_consumer, right_consumer, _) = consumer.split_at(2);
+        let mut left_folder = left_consumer.into_folder();
+        let mut right_folder = right_consumer.into_folder();
+        left_folder = left_folder.consume(0).consume(1);
+        right_folder = right_folder.consume(2);
+        let left_result = left_folder.complete();
+        let right_result = right_folder.complete();
+        reducer.reduce(left_result, right_result)
+    });
+}
+
+// The left consumer panics and the right stops short, like `panic_fuse()`.
+// We should get the left panic without finishing `Collect::with_consumer`.
+#[test]
+#[should_panic(expected = "left consumer panic")]
+fn left_panics() {
+    let mut v = vec![];
+    let collect = Collect::new(&mut v, 4);
+    collect.with_consumer(|consumer| {
+        let reducer = consumer.to_reducer();
+        let (left_consumer, right_consumer, _) = consumer.split_at(2);
+        let (left_result, right_result) = join(
+            || {
+                let mut left_folder = left_consumer.into_folder();
+                left_folder = left_folder.consume(0);
+                panic!("left consumer panic");
+            },
+            || {
+                let mut right_folder = right_consumer.into_folder();
+                right_folder = right_folder.consume(2);
+                right_folder.complete() // early return
+            },
+        );
+        reducer.reduce(left_result, right_result)
+    });
+    unreachable!();
+}
+
+// The right consumer panics and the left stops short, like `panic_fuse()`.
+// We should get the right panic without finishing `Collect::with_consumer`.
+#[test]
+#[should_panic(expected = "right consumer panic")]
+fn right_panics() {
+    let mut v = vec![];
+    let collect = Collect::new(&mut v, 4);
+    collect.with_consumer(|consumer| {
+        let reducer = consumer.to_reducer();
+        let (left_consumer, right_consumer, _) = consumer.split_at(2);
+        let (left_result, right_result) = join(
+            || {
+                let mut left_folder = left_consumer.into_folder();
+                left_folder = left_folder.consume(0);
+                left_folder.complete() // early return
+            },
+            || {
+                let mut right_folder = right_consumer.into_folder();
+                right_folder = right_folder.consume(2);
+                panic!("right consumer panic");
+            },
+        );
+        reducer.reduce(left_result, right_result)
+    });
+    unreachable!();
+}
+
+// The left consumer produces fewer items while the right
+// consumer produces correct number; check that created elements are dropped
+#[test]
+fn left_produces_fewer_items_drops() {
+    let counter = DropCounter::default();
+    let mut v = vec![];
+    let panic_result = panic::catch_unwind(panic::AssertUnwindSafe(|| {
+        let collect = Collect::new(&mut v, 4);
+        collect.with_consumer(|consumer| {
+            let reducer = consumer.to_reducer();
+            let (left_consumer, right_consumer, _) = consumer.split_at(2);
+            let mut left_folder = left_consumer.into_folder();
+            let mut right_folder = right_consumer.into_folder();
+            left_folder = left_folder.consume(counter.element());
+            right_folder = right_folder
+                .consume(counter.element())
+                .consume(counter.element());
+            let left_result = left_folder.complete();
+            let right_result = right_folder.complete();
+            reducer.reduce(left_result, right_result)
+        });
+    }));
+    assert!(v.is_empty());
+    assert_is_panic_with_message(&panic_result, "expected 4 total writes, but got 1");
+    counter.assert_drop_count();
+}
+
+/// This counter can create elements, and then count and verify
+/// the number of which have actually been dropped again.
+#[derive(Default)]
+struct DropCounter {
+    created: AtomicUsize,
+    dropped: AtomicUsize,
+}
+
+struct Element<'a>(&'a AtomicUsize);
+
+impl DropCounter {
+    fn created(&self) -> usize {
+        self.created.load(Ordering::SeqCst)
+    }
+
+    fn dropped(&self) -> usize {
+        self.dropped.load(Ordering::SeqCst)
+    }
+
+    fn element(&self) -> Element<'_> {
+        self.created.fetch_add(1, Ordering::SeqCst);
+        Element(&self.dropped)
+    }
+
+    fn assert_drop_count(&self) {
+        assert_eq!(
+            self.created(),
+            self.dropped(),
+            "Expected {} dropped elements, but found {}",
+            self.created(),
+            self.dropped()
+        );
+    }
+}
+
+impl<'a> Drop for Element<'a> {
+    fn drop(&mut self) {
+        self.0.fetch_add(1, Ordering::SeqCst);
+    }
+}
+
+/// Assert that the result from catch_unwind is a panic that contains expected message
+fn assert_is_panic_with_message<T>(result: &ThreadResult<T>, expected: &str)
+where
+    T: fmt::Debug,
+{
+    match result {
+        Ok(value) => {
+            panic!(
+                "assertion failure: Expected panic, got successful {:?}",
+                value
+            );
+        }
+        Err(error) => {
+            let message_str = error.downcast_ref::<&'static str>().cloned();
+            let message_string = error.downcast_ref::<String>().map(String::as_str);
+            if let Some(message) = message_str.or(message_string) {
+                if !message.contains(expected) {
+                    panic!(
+                        "assertion failure: Expected {:?}, but found panic with {:?}",
+                        expected, message
+                    );
+                }
+            // assertion passes
+            } else {
+                panic!(
+                    "assertion failure: Expected {:?}, but found panic with unknown value",
+                    expected
+                );
+            }
+        }
+    }
+}
diff --git a/src/iter/copied.rs b/src/iter/copied.rs
new file mode 100644
index 0000000..12c9c5b
--- /dev/null
+++ b/src/iter/copied.rs
@@ -0,0 +1,223 @@
+use super::plumbing::*;
+use super::*;
+
+use std::iter;
+
+/// `Copied` is an iterator that copies the elements of an underlying iterator.
+///
+/// This struct is created by the [`copied()`] method on [`ParallelIterator`]
+///
+/// [`copied()`]: trait.ParallelIterator.html#method.copied
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Debug, Clone)]
+pub struct Copied<I: ParallelIterator> {
+    base: I,
+}
+
+impl<I> Copied<I>
+where
+    I: ParallelIterator,
+{
+    /// Creates a new `Copied` iterator.
+    pub(super) fn new(base: I) -> Self {
+        Copied { base }
+    }
+}
+
+impl<'a, T, I> ParallelIterator for Copied<I>
+where
+    I: ParallelIterator<Item = &'a T>,
+    T: 'a + Copy + Send + Sync,
+{
+    type Item = T;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let consumer1 = CopiedConsumer::new(consumer);
+        self.base.drive_unindexed(consumer1)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        self.base.opt_len()
+    }
+}
+
+impl<'a, T, I> IndexedParallelIterator for Copied<I>
+where
+    I: IndexedParallelIterator<Item = &'a T>,
+    T: 'a + Copy + Send + Sync,
+{
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        let consumer1 = CopiedConsumer::new(consumer);
+        self.base.drive(consumer1)
+    }
+
+    fn len(&self) -> usize {
+        self.base.len()
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        return self.base.with_producer(Callback { callback });
+
+        struct Callback<CB> {
+            callback: CB,
+        }
+
+        impl<'a, T, CB> ProducerCallback<&'a T> for Callback<CB>
+        where
+            CB: ProducerCallback<T>,
+            T: 'a + Copy + Send,
+        {
+            type Output = CB::Output;
+
+            fn callback<P>(self, base: P) -> CB::Output
+            where
+                P: Producer<Item = &'a T>,
+            {
+                let producer = CopiedProducer { base };
+                self.callback.callback(producer)
+            }
+        }
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+
+struct CopiedProducer<P> {
+    base: P,
+}
+
+impl<'a, T, P> Producer for CopiedProducer<P>
+where
+    P: Producer<Item = &'a T>,
+    T: 'a + Copy,
+{
+    type Item = T;
+    type IntoIter = iter::Copied<P::IntoIter>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.base.into_iter().copied()
+    }
+
+    fn min_len(&self) -> usize {
+        self.base.min_len()
+    }
+
+    fn max_len(&self) -> usize {
+        self.base.max_len()
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        let (left, right) = self.base.split_at(index);
+        (
+            CopiedProducer { base: left },
+            CopiedProducer { base: right },
+        )
+    }
+
+    fn fold_with<F>(self, folder: F) -> F
+    where
+        F: Folder<Self::Item>,
+    {
+        self.base.fold_with(CopiedFolder { base: folder }).base
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// Consumer implementation
+
+struct CopiedConsumer<C> {
+    base: C,
+}
+
+impl<C> CopiedConsumer<C> {
+    fn new(base: C) -> Self {
+        CopiedConsumer { base }
+    }
+}
+
+impl<'a, T, C> Consumer<&'a T> for CopiedConsumer<C>
+where
+    C: Consumer<T>,
+    T: 'a + Copy,
+{
+    type Folder = CopiedFolder<C::Folder>;
+    type Reducer = C::Reducer;
+    type Result = C::Result;
+
+    fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) {
+        let (left, right, reducer) = self.base.split_at(index);
+        (
+            CopiedConsumer::new(left),
+            CopiedConsumer::new(right),
+            reducer,
+        )
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        CopiedFolder {
+            base: self.base.into_folder(),
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
+
+impl<'a, T, C> UnindexedConsumer<&'a T> for CopiedConsumer<C>
+where
+    C: UnindexedConsumer<T>,
+    T: 'a + Copy,
+{
+    fn split_off_left(&self) -> Self {
+        CopiedConsumer::new(self.base.split_off_left())
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        self.base.to_reducer()
+    }
+}
+
+struct CopiedFolder<F> {
+    base: F,
+}
+
+impl<'a, T, F> Folder<&'a T> for CopiedFolder<F>
+where
+    F: Folder<T>,
+    T: 'a + Copy,
+{
+    type Result = F::Result;
+
+    fn consume(self, &item: &'a T) -> Self {
+        CopiedFolder {
+            base: self.base.consume(item),
+        }
+    }
+
+    fn consume_iter<I>(mut self, iter: I) -> Self
+    where
+        I: IntoIterator<Item = &'a T>,
+    {
+        self.base = self.base.consume_iter(iter.into_iter().copied());
+        self
+    }
+
+    fn complete(self) -> F::Result {
+        self.base.complete()
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
diff --git a/src/iter/empty.rs b/src/iter/empty.rs
new file mode 100644
index 0000000..85a2e5f
--- /dev/null
+++ b/src/iter/empty.rs
@@ -0,0 +1,104 @@
+use crate::iter::plumbing::*;
+use crate::iter::*;
+
+use std::fmt;
+use std::marker::PhantomData;
+
+/// Creates a parallel iterator that produces nothing.
+///
+/// This admits no parallelism on its own, but it could be used for code that
+/// deals with generic parallel iterators.
+///
+/// # Examples
+///
+/// ```
+/// use rayon::prelude::*;
+/// use rayon::iter::empty;
+///
+/// let pi = (0..1234).into_par_iter()
+///     .chain(empty())
+///     .chain(1234..10_000);
+///
+/// assert_eq!(pi.count(), 10_000);
+/// ```
+pub fn empty<T: Send>() -> Empty<T> {
+    Empty {
+        marker: PhantomData,
+    }
+}
+
+/// Iterator adaptor for [the `empty()` function](fn.empty.html).
+pub struct Empty<T: Send> {
+    marker: PhantomData<T>,
+}
+
+impl<T: Send> Clone for Empty<T> {
+    fn clone(&self) -> Self {
+        empty()
+    }
+}
+
+impl<T: Send> fmt::Debug for Empty<T> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.pad("Empty")
+    }
+}
+
+impl<T: Send> ParallelIterator for Empty<T> {
+    type Item = T;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        self.drive(consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(0)
+    }
+}
+
+impl<T: Send> IndexedParallelIterator for Empty<T> {
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        consumer.into_folder().complete()
+    }
+
+    fn len(&self) -> usize {
+        0
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        callback.callback(EmptyProducer(PhantomData))
+    }
+}
+
+/// Private empty producer
+struct EmptyProducer<T: Send>(PhantomData<T>);
+
+impl<T: Send> Producer for EmptyProducer<T> {
+    type Item = T;
+    type IntoIter = std::iter::Empty<T>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        std::iter::empty()
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        debug_assert_eq!(index, 0);
+        (self, EmptyProducer(PhantomData))
+    }
+
+    fn fold_with<F>(self, folder: F) -> F
+    where
+        F: Folder<Self::Item>,
+    {
+        folder
+    }
+}
diff --git a/src/iter/enumerate.rs b/src/iter/enumerate.rs
new file mode 100644
index 0000000..980ee7c
--- /dev/null
+++ b/src/iter/enumerate.rs
@@ -0,0 +1,133 @@
+use super::plumbing::*;
+use super::*;
+use std::iter;
+use std::ops::Range;
+use std::usize;
+
+/// `Enumerate` is an iterator that returns the current count along with the element.
+/// This struct is created by the [`enumerate()`] method on [`IndexedParallelIterator`]
+///
+/// [`enumerate()`]: trait.IndexedParallelIterator.html#method.enumerate
+/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Debug, Clone)]
+pub struct Enumerate<I: IndexedParallelIterator> {
+    base: I,
+}
+
+impl<I> Enumerate<I>
+where
+    I: IndexedParallelIterator,
+{
+    /// Creates a new `Enumerate` iterator.
+    pub(super) fn new(base: I) -> Self {
+        Enumerate { base }
+    }
+}
+
+impl<I> ParallelIterator for Enumerate<I>
+where
+    I: IndexedParallelIterator,
+{
+    type Item = (usize, I::Item);
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<I> IndexedParallelIterator for Enumerate<I>
+where
+    I: IndexedParallelIterator,
+{
+    fn drive<C: Consumer<Self::Item>>(self, consumer: C) -> C::Result {
+        bridge(self, consumer)
+    }
+
+    fn len(&self) -> usize {
+        self.base.len()
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        return self.base.with_producer(Callback { callback });
+
+        struct Callback<CB> {
+            callback: CB,
+        }
+
+        impl<I, CB> ProducerCallback<I> for Callback<CB>
+        where
+            CB: ProducerCallback<(usize, I)>,
+        {
+            type Output = CB::Output;
+            fn callback<P>(self, base: P) -> CB::Output
+            where
+                P: Producer<Item = I>,
+            {
+                let producer = EnumerateProducer { base, offset: 0 };
+                self.callback.callback(producer)
+            }
+        }
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// Producer implementation
+
+struct EnumerateProducer<P> {
+    base: P,
+    offset: usize,
+}
+
+impl<P> Producer for EnumerateProducer<P>
+where
+    P: Producer,
+{
+    type Item = (usize, P::Item);
+    type IntoIter = iter::Zip<Range<usize>, P::IntoIter>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        // Enumerate only works for IndexedParallelIterators. Since those
+        // have a max length of usize::MAX, their max index is
+        // usize::MAX - 1, so the range 0..usize::MAX includes all
+        // possible indices.
+        //
+        // However, we should to use a precise end to the range, otherwise
+        // reversing the iterator may have to walk back a long ways before
+        // `Zip::next_back` can produce anything.
+        let base = self.base.into_iter();
+        let end = self.offset + base.len();
+        (self.offset..end).zip(base)
+    }
+
+    fn min_len(&self) -> usize {
+        self.base.min_len()
+    }
+    fn max_len(&self) -> usize {
+        self.base.max_len()
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        let (left, right) = self.base.split_at(index);
+        (
+            EnumerateProducer {
+                base: left,
+                offset: self.offset,
+            },
+            EnumerateProducer {
+                base: right,
+                offset: self.offset + index,
+            },
+        )
+    }
+}
diff --git a/src/iter/extend.rs b/src/iter/extend.rs
new file mode 100644
index 0000000..fb89249
--- /dev/null
+++ b/src/iter/extend.rs
@@ -0,0 +1,376 @@
+use super::noop::NoopConsumer;
+use super::{IntoParallelIterator, ParallelExtend, ParallelIterator};
+
+use std::borrow::Cow;
+use std::collections::LinkedList;
+use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
+use std::collections::{BinaryHeap, VecDeque};
+use std::hash::{BuildHasher, Hash};
+
+/// Performs a generic `par_extend` by collecting to a `LinkedList<Vec<_>>` in
+/// parallel, then extending the collection sequentially.
+fn extend<C, I, F>(collection: &mut C, par_iter: I, reserve: F)
+where
+    I: IntoParallelIterator,
+    F: FnOnce(&mut C, &LinkedList<Vec<I::Item>>),
+    C: Extend<I::Item>,
+{
+    let list = collect(par_iter);
+    reserve(collection, &list);
+    for vec in list {
+        collection.extend(vec);
+    }
+}
+
+pub(super) fn collect<I>(par_iter: I) -> LinkedList<Vec<I::Item>>
+where
+    I: IntoParallelIterator,
+{
+    par_iter
+        .into_par_iter()
+        .fold(Vec::new, vec_push)
+        .map(as_list)
+        .reduce(LinkedList::new, list_append)
+}
+
+fn vec_push<T>(mut vec: Vec<T>, elem: T) -> Vec<T> {
+    vec.push(elem);
+    vec
+}
+
+fn as_list<T>(item: T) -> LinkedList<T> {
+    let mut list = LinkedList::new();
+    list.push_back(item);
+    list
+}
+
+fn list_append<T>(mut list1: LinkedList<T>, mut list2: LinkedList<T>) -> LinkedList<T> {
+    list1.append(&mut list2);
+    list1
+}
+
+/// Computes the total length of a `LinkedList<Vec<_>>`.
+pub(super) fn len<T>(list: &LinkedList<Vec<T>>) -> usize {
+    list.iter().map(Vec::len).sum()
+}
+
+fn no_reserve<C, T>(_: &mut C, _: &LinkedList<Vec<T>>) {}
+
+fn heap_reserve<T: Ord, U>(heap: &mut BinaryHeap<T>, list: &LinkedList<Vec<U>>) {
+    heap.reserve(len(list));
+}
+
+/// Extends a binary heap with items from a parallel iterator.
+impl<T> ParallelExtend<T> for BinaryHeap<T>
+where
+    T: Ord + Send,
+{
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = T>,
+    {
+        extend(self, par_iter, heap_reserve);
+    }
+}
+
+/// Extends a binary heap with copied items from a parallel iterator.
+impl<'a, T> ParallelExtend<&'a T> for BinaryHeap<T>
+where
+    T: 'a + Copy + Ord + Send + Sync,
+{
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = &'a T>,
+    {
+        extend(self, par_iter, heap_reserve);
+    }
+}
+
+/// Extends a B-tree map with items from a parallel iterator.
+impl<K, V> ParallelExtend<(K, V)> for BTreeMap<K, V>
+where
+    K: Ord + Send,
+    V: Send,
+{
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = (K, V)>,
+    {
+        extend(self, par_iter, no_reserve);
+    }
+}
+
+/// Extends a B-tree map with copied items from a parallel iterator.
+impl<'a, K: 'a, V: 'a> ParallelExtend<(&'a K, &'a V)> for BTreeMap<K, V>
+where
+    K: Copy + Ord + Send + Sync,
+    V: Copy + Send + Sync,
+{
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = (&'a K, &'a V)>,
+    {
+        extend(self, par_iter, no_reserve);
+    }
+}
+
+/// Extends a B-tree set with items from a parallel iterator.
+impl<T> ParallelExtend<T> for BTreeSet<T>
+where
+    T: Ord + Send,
+{
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = T>,
+    {
+        extend(self, par_iter, no_reserve);
+    }
+}
+
+/// Extends a B-tree set with copied items from a parallel iterator.
+impl<'a, T> ParallelExtend<&'a T> for BTreeSet<T>
+where
+    T: 'a + Copy + Ord + Send + Sync,
+{
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = &'a T>,
+    {
+        extend(self, par_iter, no_reserve);
+    }
+}
+
+fn map_reserve<K, V, S, U>(map: &mut HashMap<K, V, S>, list: &LinkedList<Vec<U>>)
+where
+    K: Eq + Hash,
+    S: BuildHasher,
+{
+    map.reserve(len(list));
+}
+
+/// Extends a hash map with items from a parallel iterator.
+impl<K, V, S> ParallelExtend<(K, V)> for HashMap<K, V, S>
+where
+    K: Eq + Hash + Send,
+    V: Send,
+    S: BuildHasher + Send,
+{
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = (K, V)>,
+    {
+        // See the map_collect benchmarks in rayon-demo for different strategies.
+        extend(self, par_iter, map_reserve);
+    }
+}
+
+/// Extends a hash map with copied items from a parallel iterator.
+impl<'a, K: 'a, V: 'a, S> ParallelExtend<(&'a K, &'a V)> for HashMap<K, V, S>
+where
+    K: Copy + Eq + Hash + Send + Sync,
+    V: Copy + Send + Sync,
+    S: BuildHasher + Send,
+{
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = (&'a K, &'a V)>,
+    {
+        extend(self, par_iter, map_reserve);
+    }
+}
+
+fn set_reserve<T, S, U>(set: &mut HashSet<T, S>, list: &LinkedList<Vec<U>>)
+where
+    T: Eq + Hash,
+    S: BuildHasher,
+{
+    set.reserve(len(list));
+}
+
+/// Extends a hash set with items from a parallel iterator.
+impl<T, S> ParallelExtend<T> for HashSet<T, S>
+where
+    T: Eq + Hash + Send,
+    S: BuildHasher + Send,
+{
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = T>,
+    {
+        extend(self, par_iter, set_reserve);
+    }
+}
+
+/// Extends a hash set with copied items from a parallel iterator.
+impl<'a, T, S> ParallelExtend<&'a T> for HashSet<T, S>
+where
+    T: 'a + Copy + Eq + Hash + Send + Sync,
+    S: BuildHasher + Send,
+{
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = &'a T>,
+    {
+        extend(self, par_iter, set_reserve);
+    }
+}
+
+fn list_push_back<T>(mut list: LinkedList<T>, elem: T) -> LinkedList<T> {
+    list.push_back(elem);
+    list
+}
+
+/// Extends a linked list with items from a parallel iterator.
+impl<T> ParallelExtend<T> for LinkedList<T>
+where
+    T: Send,
+{
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = T>,
+    {
+        let mut list = par_iter
+            .into_par_iter()
+            .fold(LinkedList::new, list_push_back)
+            .reduce(LinkedList::new, list_append);
+        self.append(&mut list);
+    }
+}
+
+/// Extends a linked list with copied items from a parallel iterator.
+impl<'a, T> ParallelExtend<&'a T> for LinkedList<T>
+where
+    T: 'a + Copy + Send + Sync,
+{
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = &'a T>,
+    {
+        self.par_extend(par_iter.into_par_iter().cloned())
+    }
+}
+
+fn string_push(mut string: String, ch: char) -> String {
+    string.push(ch);
+    string
+}
+
+/// Extends a string with characters from a parallel iterator.
+impl ParallelExtend<char> for String {
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = char>,
+    {
+        // This is like `extend`, but `Vec<char>` is less efficient to deal
+        // with than `String`, so instead collect to `LinkedList<String>`.
+        let list: LinkedList<_> = par_iter
+            .into_par_iter()
+            .fold(String::new, string_push)
+            .map(as_list)
+            .reduce(LinkedList::new, list_append);
+
+        self.reserve(list.iter().map(String::len).sum());
+        self.extend(list)
+    }
+}
+
+/// Extends a string with copied characters from a parallel iterator.
+impl<'a> ParallelExtend<&'a char> for String {
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = &'a char>,
+    {
+        self.par_extend(par_iter.into_par_iter().cloned())
+    }
+}
+
+fn string_reserve<T: AsRef<str>>(string: &mut String, list: &LinkedList<Vec<T>>) {
+    let len = list.iter().flatten().map(T::as_ref).map(str::len).sum();
+    string.reserve(len);
+}
+
+/// Extends a string with string slices from a parallel iterator.
+impl<'a> ParallelExtend<&'a str> for String {
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = &'a str>,
+    {
+        extend(self, par_iter, string_reserve);
+    }
+}
+
+/// Extends a string with strings from a parallel iterator.
+impl ParallelExtend<String> for String {
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = String>,
+    {
+        extend(self, par_iter, string_reserve);
+    }
+}
+
+/// Extends a string with string slices from a parallel iterator.
+impl<'a> ParallelExtend<Cow<'a, str>> for String {
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = Cow<'a, str>>,
+    {
+        extend(self, par_iter, string_reserve);
+    }
+}
+
+fn deque_reserve<T, U>(deque: &mut VecDeque<T>, list: &LinkedList<Vec<U>>) {
+    deque.reserve(len(list));
+}
+
+/// Extends a deque with items from a parallel iterator.
+impl<T> ParallelExtend<T> for VecDeque<T>
+where
+    T: Send,
+{
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = T>,
+    {
+        extend(self, par_iter, deque_reserve);
+    }
+}
+
+/// Extends a deque with copied items from a parallel iterator.
+impl<'a, T> ParallelExtend<&'a T> for VecDeque<T>
+where
+    T: 'a + Copy + Send + Sync,
+{
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = &'a T>,
+    {
+        extend(self, par_iter, deque_reserve);
+    }
+}
+
+// See the `collect` module for the `Vec<T>` implementation.
+// impl<T> ParallelExtend<T> for Vec<T>
+
+/// Extends a vector with copied items from a parallel iterator.
+impl<'a, T> ParallelExtend<&'a T> for Vec<T>
+where
+    T: 'a + Copy + Send + Sync,
+{
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = &'a T>,
+    {
+        self.par_extend(par_iter.into_par_iter().cloned())
+    }
+}
+
+/// Collapses all unit items from a parallel iterator into one.
+impl ParallelExtend<()> for () {
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = ()>,
+    {
+        par_iter.into_par_iter().drive_unindexed(NoopConsumer)
+    }
+}
diff --git a/src/iter/filter.rs b/src/iter/filter.rs
new file mode 100644
index 0000000..38627f7
--- /dev/null
+++ b/src/iter/filter.rs
@@ -0,0 +1,141 @@
+use super::plumbing::*;
+use super::*;
+
+use std::fmt::{self, Debug};
+
+/// `Filter` takes a predicate `filter_op` and filters out elements that match.
+/// This struct is created by the [`filter()`] method on [`ParallelIterator`]
+///
+/// [`filter()`]: trait.ParallelIterator.html#method.filter
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Clone)]
+pub struct Filter<I: ParallelIterator, P> {
+    base: I,
+    filter_op: P,
+}
+
+impl<I: ParallelIterator + Debug, P> Debug for Filter<I, P> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("Filter").field("base", &self.base).finish()
+    }
+}
+
+impl<I, P> Filter<I, P>
+where
+    I: ParallelIterator,
+{
+    /// Creates a new `Filter` iterator.
+    pub(super) fn new(base: I, filter_op: P) -> Self {
+        Filter { base, filter_op }
+    }
+}
+
+impl<I, P> ParallelIterator for Filter<I, P>
+where
+    I: ParallelIterator,
+    P: Fn(&I::Item) -> bool + Sync + Send,
+{
+    type Item = I::Item;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let consumer1 = FilterConsumer::new(consumer, &self.filter_op);
+        self.base.drive_unindexed(consumer1)
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// Consumer implementation
+
+struct FilterConsumer<'p, C, P> {
+    base: C,
+    filter_op: &'p P,
+}
+
+impl<'p, C, P> FilterConsumer<'p, C, P> {
+    fn new(base: C, filter_op: &'p P) -> Self {
+        FilterConsumer { base, filter_op }
+    }
+}
+
+impl<'p, T, C, P: 'p> Consumer<T> for FilterConsumer<'p, C, P>
+where
+    C: Consumer<T>,
+    P: Fn(&T) -> bool + Sync,
+{
+    type Folder = FilterFolder<'p, C::Folder, P>;
+    type Reducer = C::Reducer;
+    type Result = C::Result;
+
+    fn split_at(self, index: usize) -> (Self, Self, C::Reducer) {
+        let (left, right, reducer) = self.base.split_at(index);
+        (
+            FilterConsumer::new(left, self.filter_op),
+            FilterConsumer::new(right, self.filter_op),
+            reducer,
+        )
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        FilterFolder {
+            base: self.base.into_folder(),
+            filter_op: self.filter_op,
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
+
+impl<'p, T, C, P: 'p> UnindexedConsumer<T> for FilterConsumer<'p, C, P>
+where
+    C: UnindexedConsumer<T>,
+    P: Fn(&T) -> bool + Sync,
+{
+    fn split_off_left(&self) -> Self {
+        FilterConsumer::new(self.base.split_off_left(), &self.filter_op)
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        self.base.to_reducer()
+    }
+}
+
+struct FilterFolder<'p, C, P> {
+    base: C,
+    filter_op: &'p P,
+}
+
+impl<'p, C, P, T> Folder<T> for FilterFolder<'p, C, P>
+where
+    C: Folder<T>,
+    P: Fn(&T) -> bool + 'p,
+{
+    type Result = C::Result;
+
+    fn consume(self, item: T) -> Self {
+        let filter_op = self.filter_op;
+        if filter_op(&item) {
+            let base = self.base.consume(item);
+            FilterFolder { base, filter_op }
+        } else {
+            self
+        }
+    }
+
+    // This cannot easily specialize `consume_iter` to be better than
+    // the default, because that requires checking `self.base.full()`
+    // during a call to `self.base.consume_iter()`. (#632)
+
+    fn complete(self) -> Self::Result {
+        self.base.complete()
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
diff --git a/src/iter/filter_map.rs b/src/iter/filter_map.rs
new file mode 100644
index 0000000..f19c385
--- /dev/null
+++ b/src/iter/filter_map.rs
@@ -0,0 +1,142 @@
+use super::plumbing::*;
+use super::*;
+
+use std::fmt::{self, Debug};
+
+/// `FilterMap` creates an iterator that uses `filter_op` to both filter and map elements.
+/// This struct is created by the [`filter_map()`] method on [`ParallelIterator`].
+///
+/// [`filter_map()`]: trait.ParallelIterator.html#method.filter_map
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Clone)]
+pub struct FilterMap<I: ParallelIterator, P> {
+    base: I,
+    filter_op: P,
+}
+
+impl<I: ParallelIterator + Debug, P> Debug for FilterMap<I, P> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("FilterMap")
+            .field("base", &self.base)
+            .finish()
+    }
+}
+
+impl<I: ParallelIterator, P> FilterMap<I, P> {
+    /// Creates a new `FilterMap` iterator.
+    pub(super) fn new(base: I, filter_op: P) -> Self {
+        FilterMap { base, filter_op }
+    }
+}
+
+impl<I, P, R> ParallelIterator for FilterMap<I, P>
+where
+    I: ParallelIterator,
+    P: Fn(I::Item) -> Option<R> + Sync + Send,
+    R: Send,
+{
+    type Item = R;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let consumer = FilterMapConsumer::new(consumer, &self.filter_op);
+        self.base.drive_unindexed(consumer)
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// Consumer implementation
+
+struct FilterMapConsumer<'p, C, P> {
+    base: C,
+    filter_op: &'p P,
+}
+
+impl<'p, C, P: 'p> FilterMapConsumer<'p, C, P> {
+    fn new(base: C, filter_op: &'p P) -> Self {
+        FilterMapConsumer { base, filter_op }
+    }
+}
+
+impl<'p, T, U, C, P> Consumer<T> for FilterMapConsumer<'p, C, P>
+where
+    C: Consumer<U>,
+    P: Fn(T) -> Option<U> + Sync + 'p,
+{
+    type Folder = FilterMapFolder<'p, C::Folder, P>;
+    type Reducer = C::Reducer;
+    type Result = C::Result;
+
+    fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) {
+        let (left, right, reducer) = self.base.split_at(index);
+        (
+            FilterMapConsumer::new(left, self.filter_op),
+            FilterMapConsumer::new(right, self.filter_op),
+            reducer,
+        )
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        let base = self.base.into_folder();
+        FilterMapFolder {
+            base,
+            filter_op: self.filter_op,
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
+
+impl<'p, T, U, C, P> UnindexedConsumer<T> for FilterMapConsumer<'p, C, P>
+where
+    C: UnindexedConsumer<U>,
+    P: Fn(T) -> Option<U> + Sync + 'p,
+{
+    fn split_off_left(&self) -> Self {
+        FilterMapConsumer::new(self.base.split_off_left(), &self.filter_op)
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        self.base.to_reducer()
+    }
+}
+
+struct FilterMapFolder<'p, C, P> {
+    base: C,
+    filter_op: &'p P,
+}
+
+impl<'p, T, U, C, P> Folder<T> for FilterMapFolder<'p, C, P>
+where
+    C: Folder<U>,
+    P: Fn(T) -> Option<U> + Sync + 'p,
+{
+    type Result = C::Result;
+
+    fn consume(self, item: T) -> Self {
+        let filter_op = self.filter_op;
+        if let Some(mapped_item) = filter_op(item) {
+            let base = self.base.consume(mapped_item);
+            FilterMapFolder { base, filter_op }
+        } else {
+            self
+        }
+    }
+
+    // This cannot easily specialize `consume_iter` to be better than
+    // the default, because that requires checking `self.base.full()`
+    // during a call to `self.base.consume_iter()`. (#632)
+
+    fn complete(self) -> C::Result {
+        self.base.complete()
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
diff --git a/src/iter/find.rs b/src/iter/find.rs
new file mode 100644
index 0000000..971db2b
--- /dev/null
+++ b/src/iter/find.rs
@@ -0,0 +1,120 @@
+use super::plumbing::*;
+use super::*;
+use std::sync::atomic::{AtomicBool, Ordering};
+
+pub(super) fn find<I, P>(pi: I, find_op: P) -> Option<I::Item>
+where
+    I: ParallelIterator,
+    P: Fn(&I::Item) -> bool + Sync,
+{
+    let found = AtomicBool::new(false);
+    let consumer = FindConsumer::new(&find_op, &found);
+    pi.drive_unindexed(consumer)
+}
+
+struct FindConsumer<'p, P> {
+    find_op: &'p P,
+    found: &'p AtomicBool,
+}
+
+impl<'p, P> FindConsumer<'p, P> {
+    fn new(find_op: &'p P, found: &'p AtomicBool) -> Self {
+        FindConsumer { find_op, found }
+    }
+}
+
+impl<'p, T, P: 'p> Consumer<T> for FindConsumer<'p, P>
+where
+    T: Send,
+    P: Fn(&T) -> bool + Sync,
+{
+    type Folder = FindFolder<'p, T, P>;
+    type Reducer = FindReducer;
+    type Result = Option<T>;
+
+    fn split_at(self, _index: usize) -> (Self, Self, Self::Reducer) {
+        (self.split_off_left(), self, FindReducer)
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        FindFolder {
+            find_op: self.find_op,
+            found: self.found,
+            item: None,
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.found.load(Ordering::Relaxed)
+    }
+}
+
+impl<'p, T, P: 'p> UnindexedConsumer<T> for FindConsumer<'p, P>
+where
+    T: Send,
+    P: Fn(&T) -> bool + Sync,
+{
+    fn split_off_left(&self) -> Self {
+        FindConsumer::new(self.find_op, self.found)
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        FindReducer
+    }
+}
+
+struct FindFolder<'p, T, P> {
+    find_op: &'p P,
+    found: &'p AtomicBool,
+    item: Option<T>,
+}
+
+impl<'p, T, P> Folder<T> for FindFolder<'p, T, P>
+where
+    P: Fn(&T) -> bool + 'p,
+{
+    type Result = Option<T>;
+
+    fn consume(mut self, item: T) -> Self {
+        if (self.find_op)(&item) {
+            self.found.store(true, Ordering::Relaxed);
+            self.item = Some(item);
+        }
+        self
+    }
+
+    fn consume_iter<I>(mut self, iter: I) -> Self
+    where
+        I: IntoIterator<Item = T>,
+    {
+        fn not_full<T>(found: &AtomicBool) -> impl Fn(&T) -> bool + '_ {
+            move |_| !found.load(Ordering::Relaxed)
+        }
+
+        self.item = iter
+            .into_iter()
+            // stop iterating if another thread has found something
+            .take_while(not_full(&self.found))
+            .find(self.find_op);
+        if self.item.is_some() {
+            self.found.store(true, Ordering::Relaxed)
+        }
+        self
+    }
+
+    fn complete(self) -> Self::Result {
+        self.item
+    }
+
+    fn full(&self) -> bool {
+        self.found.load(Ordering::Relaxed)
+    }
+}
+
+struct FindReducer;
+
+impl<T> Reducer<Option<T>> for FindReducer {
+    fn reduce(self, left: Option<T>, right: Option<T>) -> Option<T> {
+        left.or(right)
+    }
+}
diff --git a/src/iter/find_first_last/mod.rs b/src/iter/find_first_last/mod.rs
new file mode 100644
index 0000000..e5da8f0
--- /dev/null
+++ b/src/iter/find_first_last/mod.rs
@@ -0,0 +1,238 @@
+use super::plumbing::*;
+use super::*;
+use std::cell::Cell;
+use std::sync::atomic::{AtomicUsize, Ordering};
+
+#[cfg(test)]
+mod test;
+
+// The key optimization for find_first is that a consumer can stop its search if
+// some consumer to its left already found a match (and similarly for consumers
+// to the right for find_last). To make this work, all consumers need some
+// notion of their position in the data relative to other consumers, including
+// unindexed consumers that have no built-in notion of position.
+//
+// To solve this, we assign each consumer a lower and upper bound for an
+// imaginary "range" of data that it consumes. The initial consumer starts with
+// the range 0..usize::max_value(). The split divides this range in half so that
+// one resulting consumer has the range 0..(usize::max_value() / 2), and the
+// other has (usize::max_value() / 2)..usize::max_value(). Every subsequent
+// split divides the range in half again until it cannot be split anymore
+// (i.e. its length is 1), in which case the split returns two consumers with
+// the same range. In that case both consumers will continue to consume all
+// their data regardless of whether a better match is found, but the reducer
+// will still return the correct answer.
+
+#[derive(Copy, Clone)]
+enum MatchPosition {
+    Leftmost,
+    Rightmost,
+}
+
+/// Returns true if pos1 is a better match than pos2 according to MatchPosition
+#[inline]
+fn better_position(pos1: usize, pos2: usize, mp: MatchPosition) -> bool {
+    match mp {
+        MatchPosition::Leftmost => pos1 < pos2,
+        MatchPosition::Rightmost => pos1 > pos2,
+    }
+}
+
+pub(super) fn find_first<I, P>(pi: I, find_op: P) -> Option<I::Item>
+where
+    I: ParallelIterator,
+    P: Fn(&I::Item) -> bool + Sync,
+{
+    let best_found = AtomicUsize::new(usize::max_value());
+    let consumer = FindConsumer::new(&find_op, MatchPosition::Leftmost, &best_found);
+    pi.drive_unindexed(consumer)
+}
+
+pub(super) fn find_last<I, P>(pi: I, find_op: P) -> Option<I::Item>
+where
+    I: ParallelIterator,
+    P: Fn(&I::Item) -> bool + Sync,
+{
+    let best_found = AtomicUsize::new(0);
+    let consumer = FindConsumer::new(&find_op, MatchPosition::Rightmost, &best_found);
+    pi.drive_unindexed(consumer)
+}
+
+struct FindConsumer<'p, P> {
+    find_op: &'p P,
+    lower_bound: Cell<usize>,
+    upper_bound: usize,
+    match_position: MatchPosition,
+    best_found: &'p AtomicUsize,
+}
+
+impl<'p, P> FindConsumer<'p, P> {
+    fn new(find_op: &'p P, match_position: MatchPosition, best_found: &'p AtomicUsize) -> Self {
+        FindConsumer {
+            find_op,
+            lower_bound: Cell::new(0),
+            upper_bound: usize::max_value(),
+            match_position,
+            best_found,
+        }
+    }
+
+    fn current_index(&self) -> usize {
+        match self.match_position {
+            MatchPosition::Leftmost => self.lower_bound.get(),
+            MatchPosition::Rightmost => self.upper_bound,
+        }
+    }
+}
+
+impl<'p, T, P> Consumer<T> for FindConsumer<'p, P>
+where
+    T: Send,
+    P: Fn(&T) -> bool + Sync,
+{
+    type Folder = FindFolder<'p, T, P>;
+    type Reducer = FindReducer;
+    type Result = Option<T>;
+
+    fn split_at(self, _index: usize) -> (Self, Self, Self::Reducer) {
+        let dir = self.match_position;
+        (
+            self.split_off_left(),
+            self,
+            FindReducer {
+                match_position: dir,
+            },
+        )
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        FindFolder {
+            find_op: self.find_op,
+            boundary: self.current_index(),
+            match_position: self.match_position,
+            best_found: self.best_found,
+            item: None,
+        }
+    }
+
+    fn full(&self) -> bool {
+        // can stop consuming if the best found index so far is *strictly*
+        // better than anything this consumer will find
+        better_position(
+            self.best_found.load(Ordering::Relaxed),
+            self.current_index(),
+            self.match_position,
+        )
+    }
+}
+
+impl<'p, T, P> UnindexedConsumer<T> for FindConsumer<'p, P>
+where
+    T: Send,
+    P: Fn(&T) -> bool + Sync,
+{
+    fn split_off_left(&self) -> Self {
+        // Upper bound for one consumer will be lower bound for the other. This
+        // overlap is okay, because only one of the bounds will be used for
+        // comparing against best_found; the other is kept only to be able to
+        // divide the range in half.
+        //
+        // When the resolution of usize has been exhausted (i.e. when
+        // upper_bound = lower_bound), both results of this split will have the
+        // same range. When that happens, we lose the ability to tell one
+        // consumer to stop working when the other finds a better match, but the
+        // reducer ensures that the best answer is still returned (see the test
+        // above).
+        let old_lower_bound = self.lower_bound.get();
+        let median = old_lower_bound + ((self.upper_bound - old_lower_bound) / 2);
+        self.lower_bound.set(median);
+
+        FindConsumer {
+            find_op: self.find_op,
+            lower_bound: Cell::new(old_lower_bound),
+            upper_bound: median,
+            match_position: self.match_position,
+            best_found: self.best_found,
+        }
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        FindReducer {
+            match_position: self.match_position,
+        }
+    }
+}
+
+struct FindFolder<'p, T, P> {
+    find_op: &'p P,
+    boundary: usize,
+    match_position: MatchPosition,
+    best_found: &'p AtomicUsize,
+    item: Option<T>,
+}
+
+impl<'p, P: 'p + Fn(&T) -> bool, T> Folder<T> for FindFolder<'p, T, P> {
+    type Result = Option<T>;
+
+    fn consume(mut self, item: T) -> Self {
+        let found_best_in_range = match self.match_position {
+            MatchPosition::Leftmost => self.item.is_some(),
+            MatchPosition::Rightmost => false,
+        };
+
+        if !found_best_in_range && (self.find_op)(&item) {
+            // Continuously try to set best_found until we succeed or we
+            // discover a better match was already found.
+            let mut current = self.best_found.load(Ordering::Relaxed);
+            loop {
+                if better_position(current, self.boundary, self.match_position) {
+                    break;
+                }
+                match self.best_found.compare_exchange_weak(
+                    current,
+                    self.boundary,
+                    Ordering::Relaxed,
+                    Ordering::Relaxed,
+                ) {
+                    Ok(_) => {
+                        self.item = Some(item);
+                        break;
+                    }
+                    Err(v) => current = v,
+                }
+            }
+        }
+        self
+    }
+
+    fn complete(self) -> Self::Result {
+        self.item
+    }
+
+    fn full(&self) -> bool {
+        let found_best_in_range = match self.match_position {
+            MatchPosition::Leftmost => self.item.is_some(),
+            MatchPosition::Rightmost => false,
+        };
+
+        found_best_in_range
+            || better_position(
+                self.best_found.load(Ordering::Relaxed),
+                self.boundary,
+                self.match_position,
+            )
+    }
+}
+
+struct FindReducer {
+    match_position: MatchPosition,
+}
+
+impl<T> Reducer<Option<T>> for FindReducer {
+    fn reduce(self, left: Option<T>, right: Option<T>) -> Option<T> {
+        match self.match_position {
+            MatchPosition::Leftmost => left.or(right),
+            MatchPosition::Rightmost => right.or(left),
+        }
+    }
+}
diff --git a/src/iter/find_first_last/test.rs b/src/iter/find_first_last/test.rs
new file mode 100644
index 0000000..05271bc
--- /dev/null
+++ b/src/iter/find_first_last/test.rs
@@ -0,0 +1,106 @@
+use super::*;
+use std::sync::atomic::AtomicUsize;
+
+#[test]
+fn same_range_first_consumers_return_correct_answer() {
+    let find_op = |x: &i32| x % 2 == 0;
+    let first_found = AtomicUsize::new(usize::max_value());
+    let far_right_consumer = FindConsumer::new(&find_op, MatchPosition::Leftmost, &first_found);
+
+    // We save a consumer that will be far to the right of the main consumer (and therefore not
+    // sharing an index range with that consumer) for fullness testing
+    let consumer = far_right_consumer.split_off_left();
+
+    // split until we have an indivisible range
+    let bits_in_usize = usize::min_value().count_zeros();
+
+    for _ in 0..bits_in_usize {
+        consumer.split_off_left();
+    }
+
+    let reducer = consumer.to_reducer();
+    // the left and right folders should now have the same range, having
+    // exhausted the resolution of usize
+    let left_folder = consumer.split_off_left().into_folder();
+    let right_folder = consumer.into_folder();
+
+    let left_folder = left_folder.consume(0).consume(1);
+    assert_eq!(left_folder.boundary, right_folder.boundary);
+    // expect not full even though a better match has been found because the
+    // ranges are the same
+    assert!(!right_folder.full());
+    assert!(far_right_consumer.full());
+    let right_folder = right_folder.consume(2).consume(3);
+    assert_eq!(
+        reducer.reduce(left_folder.complete(), right_folder.complete()),
+        Some(0)
+    );
+}
+
+#[test]
+fn same_range_last_consumers_return_correct_answer() {
+    let find_op = |x: &i32| x % 2 == 0;
+    let last_found = AtomicUsize::new(0);
+    let consumer = FindConsumer::new(&find_op, MatchPosition::Rightmost, &last_found);
+
+    // We save a consumer that will be far to the left of the main consumer (and therefore not
+    // sharing an index range with that consumer) for fullness testing
+    let far_left_consumer = consumer.split_off_left();
+
+    // split until we have an indivisible range
+    let bits_in_usize = usize::min_value().count_zeros();
+    for _ in 0..bits_in_usize {
+        consumer.split_off_left();
+    }
+
+    let reducer = consumer.to_reducer();
+    // due to the exact calculation in split_off_left, the very last consumer has a
+    // range of width 2, so we use the second-to-last consumer instead to get
+    // the same boundary on both folders
+    let consumer = consumer.split_off_left();
+    let left_folder = consumer.split_off_left().into_folder();
+    let right_folder = consumer.into_folder();
+    let right_folder = right_folder.consume(2).consume(3);
+    assert_eq!(left_folder.boundary, right_folder.boundary);
+    // expect not full even though a better match has been found because the
+    // ranges are the same
+    assert!(!left_folder.full());
+    assert!(far_left_consumer.full());
+    let left_folder = left_folder.consume(0).consume(1);
+    assert_eq!(
+        reducer.reduce(left_folder.complete(), right_folder.complete()),
+        Some(2)
+    );
+}
+
+// These tests requires that a folder be assigned to an iterator with more than
+// one element. We can't necessarily determine when that will happen for a given
+// input to find_first/find_last, so we test the folder directly here instead.
+#[test]
+fn find_first_folder_does_not_clobber_first_found() {
+    let best_found = AtomicUsize::new(usize::max_value());
+    let f = FindFolder {
+        find_op: &(|&_: &i32| -> bool { true }),
+        boundary: 0,
+        match_position: MatchPosition::Leftmost,
+        best_found: &best_found,
+        item: None,
+    };
+    let f = f.consume(0_i32).consume(1_i32).consume(2_i32);
+    assert!(f.full());
+    assert_eq!(f.complete(), Some(0_i32));
+}
+
+#[test]
+fn find_last_folder_yields_last_match() {
+    let best_found = AtomicUsize::new(0);
+    let f = FindFolder {
+        find_op: &(|&_: &i32| -> bool { true }),
+        boundary: 0,
+        match_position: MatchPosition::Rightmost,
+        best_found: &best_found,
+        item: None,
+    };
+    let f = f.consume(0_i32).consume(1_i32).consume(2_i32);
+    assert_eq!(f.complete(), Some(2_i32));
+}
diff --git a/src/iter/flat_map.rs b/src/iter/flat_map.rs
new file mode 100644
index 0000000..f264e1e
--- /dev/null
+++ b/src/iter/flat_map.rs
@@ -0,0 +1,154 @@
+use super::plumbing::*;
+use super::*;
+
+use std::fmt::{self, Debug};
+
+/// `FlatMap` maps each element to a parallel iterator, then flattens these iterators together.
+/// This struct is created by the [`flat_map()`] method on [`ParallelIterator`]
+///
+/// [`flat_map()`]: trait.ParallelIterator.html#method.flat_map
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Clone)]
+pub struct FlatMap<I: ParallelIterator, F> {
+    base: I,
+    map_op: F,
+}
+
+impl<I: ParallelIterator + Debug, F> Debug for FlatMap<I, F> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("FlatMap").field("base", &self.base).finish()
+    }
+}
+
+impl<I: ParallelIterator, F> FlatMap<I, F> {
+    /// Creates a new `FlatMap` iterator.
+    pub(super) fn new(base: I, map_op: F) -> Self {
+        FlatMap { base, map_op }
+    }
+}
+
+impl<I, F, PI> ParallelIterator for FlatMap<I, F>
+where
+    I: ParallelIterator,
+    F: Fn(I::Item) -> PI + Sync + Send,
+    PI: IntoParallelIterator,
+{
+    type Item = PI::Item;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let consumer = FlatMapConsumer::new(consumer, &self.map_op);
+        self.base.drive_unindexed(consumer)
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// Consumer implementation
+
+struct FlatMapConsumer<'f, C, F> {
+    base: C,
+    map_op: &'f F,
+}
+
+impl<'f, C, F> FlatMapConsumer<'f, C, F> {
+    fn new(base: C, map_op: &'f F) -> Self {
+        FlatMapConsumer { base, map_op }
+    }
+}
+
+impl<'f, T, U, C, F> Consumer<T> for FlatMapConsumer<'f, C, F>
+where
+    C: UnindexedConsumer<U::Item>,
+    F: Fn(T) -> U + Sync,
+    U: IntoParallelIterator,
+{
+    type Folder = FlatMapFolder<'f, C, F, C::Result>;
+    type Reducer = C::Reducer;
+    type Result = C::Result;
+
+    fn split_at(self, index: usize) -> (Self, Self, C::Reducer) {
+        let (left, right, reducer) = self.base.split_at(index);
+        (
+            FlatMapConsumer::new(left, self.map_op),
+            FlatMapConsumer::new(right, self.map_op),
+            reducer,
+        )
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        FlatMapFolder {
+            base: self.base,
+            map_op: self.map_op,
+            previous: None,
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
+
+impl<'f, T, U, C, F> UnindexedConsumer<T> for FlatMapConsumer<'f, C, F>
+where
+    C: UnindexedConsumer<U::Item>,
+    F: Fn(T) -> U + Sync,
+    U: IntoParallelIterator,
+{
+    fn split_off_left(&self) -> Self {
+        FlatMapConsumer::new(self.base.split_off_left(), self.map_op)
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        self.base.to_reducer()
+    }
+}
+
+struct FlatMapFolder<'f, C, F, R> {
+    base: C,
+    map_op: &'f F,
+    previous: Option<R>,
+}
+
+impl<'f, T, U, C, F> Folder<T> for FlatMapFolder<'f, C, F, C::Result>
+where
+    C: UnindexedConsumer<U::Item>,
+    F: Fn(T) -> U + Sync,
+    U: IntoParallelIterator,
+{
+    type Result = C::Result;
+
+    fn consume(self, item: T) -> Self {
+        let map_op = self.map_op;
+        let par_iter = map_op(item).into_par_iter();
+        let consumer = self.base.split_off_left();
+        let result = par_iter.drive_unindexed(consumer);
+
+        let previous = match self.previous {
+            None => Some(result),
+            Some(previous) => {
+                let reducer = self.base.to_reducer();
+                Some(reducer.reduce(previous, result))
+            }
+        };
+
+        FlatMapFolder {
+            base: self.base,
+            map_op,
+            previous,
+        }
+    }
+
+    fn complete(self) -> Self::Result {
+        match self.previous {
+            Some(previous) => previous,
+            None => self.base.into_folder().complete(),
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
diff --git a/src/iter/flat_map_iter.rs b/src/iter/flat_map_iter.rs
new file mode 100644
index 0000000..c76cf68
--- /dev/null
+++ b/src/iter/flat_map_iter.rs
@@ -0,0 +1,147 @@
+use super::plumbing::*;
+use super::*;
+
+use std::fmt::{self, Debug};
+
+/// `FlatMapIter` maps each element to a serial iterator, then flattens these iterators together.
+/// This struct is created by the [`flat_map_iter()`] method on [`ParallelIterator`]
+///
+/// [`flat_map_iter()`]: trait.ParallelIterator.html#method.flat_map_iter
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Clone)]
+pub struct FlatMapIter<I: ParallelIterator, F> {
+    base: I,
+    map_op: F,
+}
+
+impl<I: ParallelIterator + Debug, F> Debug for FlatMapIter<I, F> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("FlatMapIter")
+            .field("base", &self.base)
+            .finish()
+    }
+}
+
+impl<I: ParallelIterator, F> FlatMapIter<I, F> {
+    /// Creates a new `FlatMapIter` iterator.
+    pub(super) fn new(base: I, map_op: F) -> Self {
+        FlatMapIter { base, map_op }
+    }
+}
+
+impl<I, F, SI> ParallelIterator for FlatMapIter<I, F>
+where
+    I: ParallelIterator,
+    F: Fn(I::Item) -> SI + Sync + Send,
+    SI: IntoIterator,
+    SI::Item: Send,
+{
+    type Item = SI::Item;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let consumer = FlatMapIterConsumer::new(consumer, &self.map_op);
+        self.base.drive_unindexed(consumer)
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// Consumer implementation
+
+struct FlatMapIterConsumer<'f, C, F> {
+    base: C,
+    map_op: &'f F,
+}
+
+impl<'f, C, F> FlatMapIterConsumer<'f, C, F> {
+    fn new(base: C, map_op: &'f F) -> Self {
+        FlatMapIterConsumer { base, map_op }
+    }
+}
+
+impl<'f, T, U, C, F> Consumer<T> for FlatMapIterConsumer<'f, C, F>
+where
+    C: UnindexedConsumer<U::Item>,
+    F: Fn(T) -> U + Sync,
+    U: IntoIterator,
+{
+    type Folder = FlatMapIterFolder<'f, C::Folder, F>;
+    type Reducer = C::Reducer;
+    type Result = C::Result;
+
+    fn split_at(self, index: usize) -> (Self, Self, C::Reducer) {
+        let (left, right, reducer) = self.base.split_at(index);
+        (
+            FlatMapIterConsumer::new(left, self.map_op),
+            FlatMapIterConsumer::new(right, self.map_op),
+            reducer,
+        )
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        FlatMapIterFolder {
+            base: self.base.into_folder(),
+            map_op: self.map_op,
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
+
+impl<'f, T, U, C, F> UnindexedConsumer<T> for FlatMapIterConsumer<'f, C, F>
+where
+    C: UnindexedConsumer<U::Item>,
+    F: Fn(T) -> U + Sync,
+    U: IntoIterator,
+{
+    fn split_off_left(&self) -> Self {
+        FlatMapIterConsumer::new(self.base.split_off_left(), self.map_op)
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        self.base.to_reducer()
+    }
+}
+
+struct FlatMapIterFolder<'f, C, F> {
+    base: C,
+    map_op: &'f F,
+}
+
+impl<'f, T, U, C, F> Folder<T> for FlatMapIterFolder<'f, C, F>
+where
+    C: Folder<U::Item>,
+    F: Fn(T) -> U,
+    U: IntoIterator,
+{
+    type Result = C::Result;
+
+    fn consume(self, item: T) -> Self {
+        let map_op = self.map_op;
+        let base = self.base.consume_iter(map_op(item));
+        FlatMapIterFolder { base, map_op }
+    }
+
+    fn consume_iter<I>(self, iter: I) -> Self
+    where
+        I: IntoIterator<Item = T>,
+    {
+        let map_op = self.map_op;
+        let iter = iter.into_iter().flat_map(map_op);
+        let base = self.base.consume_iter(iter);
+        FlatMapIterFolder { base, map_op }
+    }
+
+    fn complete(self) -> Self::Result {
+        self.base.complete()
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
diff --git a/src/iter/flatten.rs b/src/iter/flatten.rs
new file mode 100644
index 0000000..29d88f9
--- /dev/null
+++ b/src/iter/flatten.rs
@@ -0,0 +1,140 @@
+use super::plumbing::*;
+use super::*;
+
+/// `Flatten` turns each element to a parallel iterator, then flattens these iterators
+/// together. This struct is created by the [`flatten()`] method on [`ParallelIterator`].
+///
+/// [`flatten()`]: trait.ParallelIterator.html#method.flatten
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Debug, Clone)]
+pub struct Flatten<I: ParallelIterator> {
+    base: I,
+}
+
+impl<I> Flatten<I>
+where
+    I: ParallelIterator,
+    I::Item: IntoParallelIterator,
+{
+    /// Creates a new `Flatten` iterator.
+    pub(super) fn new(base: I) -> Self {
+        Flatten { base }
+    }
+}
+
+impl<I> ParallelIterator for Flatten<I>
+where
+    I: ParallelIterator,
+    I::Item: IntoParallelIterator,
+{
+    type Item = <I::Item as IntoParallelIterator>::Item;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let consumer = FlattenConsumer::new(consumer);
+        self.base.drive_unindexed(consumer)
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// Consumer implementation
+
+struct FlattenConsumer<C> {
+    base: C,
+}
+
+impl<C> FlattenConsumer<C> {
+    fn new(base: C) -> Self {
+        FlattenConsumer { base }
+    }
+}
+
+impl<T, C> Consumer<T> for FlattenConsumer<C>
+where
+    C: UnindexedConsumer<T::Item>,
+    T: IntoParallelIterator,
+{
+    type Folder = FlattenFolder<C, C::Result>;
+    type Reducer = C::Reducer;
+    type Result = C::Result;
+
+    fn split_at(self, index: usize) -> (Self, Self, C::Reducer) {
+        let (left, right, reducer) = self.base.split_at(index);
+        (
+            FlattenConsumer::new(left),
+            FlattenConsumer::new(right),
+            reducer,
+        )
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        FlattenFolder {
+            base: self.base,
+            previous: None,
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
+
+impl<T, C> UnindexedConsumer<T> for FlattenConsumer<C>
+where
+    C: UnindexedConsumer<T::Item>,
+    T: IntoParallelIterator,
+{
+    fn split_off_left(&self) -> Self {
+        FlattenConsumer::new(self.base.split_off_left())
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        self.base.to_reducer()
+    }
+}
+
+struct FlattenFolder<C, R> {
+    base: C,
+    previous: Option<R>,
+}
+
+impl<T, C> Folder<T> for FlattenFolder<C, C::Result>
+where
+    C: UnindexedConsumer<T::Item>,
+    T: IntoParallelIterator,
+{
+    type Result = C::Result;
+
+    fn consume(self, item: T) -> Self {
+        let par_iter = item.into_par_iter();
+        let consumer = self.base.split_off_left();
+        let result = par_iter.drive_unindexed(consumer);
+
+        let previous = match self.previous {
+            None => Some(result),
+            Some(previous) => {
+                let reducer = self.base.to_reducer();
+                Some(reducer.reduce(previous, result))
+            }
+        };
+
+        FlattenFolder {
+            base: self.base,
+            previous,
+        }
+    }
+
+    fn complete(self) -> Self::Result {
+        match self.previous {
+            Some(previous) => previous,
+            None => self.base.into_folder().complete(),
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
diff --git a/src/iter/flatten_iter.rs b/src/iter/flatten_iter.rs
new file mode 100644
index 0000000..3ce0a3c
--- /dev/null
+++ b/src/iter/flatten_iter.rs
@@ -0,0 +1,132 @@
+use super::plumbing::*;
+use super::*;
+
+/// `FlattenIter` turns each element to a serial iterator, then flattens these iterators
+/// together. This struct is created by the [`flatten_iter()`] method on [`ParallelIterator`].
+///
+/// [`flatten_iter()`]: trait.ParallelIterator.html#method.flatten_iter
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Debug, Clone)]
+pub struct FlattenIter<I: ParallelIterator> {
+    base: I,
+}
+
+impl<I> FlattenIter<I>
+where
+    I: ParallelIterator,
+    I::Item: IntoIterator,
+    <I::Item as IntoIterator>::Item: Send,
+{
+    /// Creates a new `FlattenIter` iterator.
+    pub(super) fn new(base: I) -> Self {
+        FlattenIter { base }
+    }
+}
+
+impl<I> ParallelIterator for FlattenIter<I>
+where
+    I: ParallelIterator,
+    I::Item: IntoIterator,
+    <I::Item as IntoIterator>::Item: Send,
+{
+    type Item = <I::Item as IntoIterator>::Item;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let consumer = FlattenIterConsumer::new(consumer);
+        self.base.drive_unindexed(consumer)
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// Consumer implementation
+
+struct FlattenIterConsumer<C> {
+    base: C,
+}
+
+impl<C> FlattenIterConsumer<C> {
+    fn new(base: C) -> Self {
+        FlattenIterConsumer { base }
+    }
+}
+
+impl<T, C> Consumer<T> for FlattenIterConsumer<C>
+where
+    C: UnindexedConsumer<T::Item>,
+    T: IntoIterator,
+{
+    type Folder = FlattenIterFolder<C::Folder>;
+    type Reducer = C::Reducer;
+    type Result = C::Result;
+
+    fn split_at(self, index: usize) -> (Self, Self, C::Reducer) {
+        let (left, right, reducer) = self.base.split_at(index);
+        (
+            FlattenIterConsumer::new(left),
+            FlattenIterConsumer::new(right),
+            reducer,
+        )
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        FlattenIterFolder {
+            base: self.base.into_folder(),
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
+
+impl<T, C> UnindexedConsumer<T> for FlattenIterConsumer<C>
+where
+    C: UnindexedConsumer<T::Item>,
+    T: IntoIterator,
+{
+    fn split_off_left(&self) -> Self {
+        FlattenIterConsumer::new(self.base.split_off_left())
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        self.base.to_reducer()
+    }
+}
+
+struct FlattenIterFolder<C> {
+    base: C,
+}
+
+impl<T, C> Folder<T> for FlattenIterFolder<C>
+where
+    C: Folder<T::Item>,
+    T: IntoIterator,
+{
+    type Result = C::Result;
+
+    fn consume(self, item: T) -> Self {
+        let base = self.base.consume_iter(item);
+        FlattenIterFolder { base }
+    }
+
+    fn consume_iter<I>(self, iter: I) -> Self
+    where
+        I: IntoIterator<Item = T>,
+    {
+        let iter = iter.into_iter().flatten();
+        let base = self.base.consume_iter(iter);
+        FlattenIterFolder { base }
+    }
+
+    fn complete(self) -> Self::Result {
+        self.base.complete()
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
diff --git a/src/iter/fold.rs b/src/iter/fold.rs
new file mode 100644
index 0000000..345afbd
--- /dev/null
+++ b/src/iter/fold.rs
@@ -0,0 +1,302 @@
+use super::plumbing::*;
+use super::*;
+
+use std::fmt::{self, Debug};
+
+impl<U, I, ID, F> Fold<I, ID, F>
+where
+    I: ParallelIterator,
+    F: Fn(U, I::Item) -> U + Sync + Send,
+    ID: Fn() -> U + Sync + Send,
+    U: Send,
+{
+    pub(super) fn new(base: I, identity: ID, fold_op: F) -> Self {
+        Fold {
+            base,
+            identity,
+            fold_op,
+        }
+    }
+}
+
+/// `Fold` is an iterator that applies a function over an iterator producing a single value.
+/// This struct is created by the [`fold()`] method on [`ParallelIterator`]
+///
+/// [`fold()`]: trait.ParallelIterator.html#method.fold
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Clone)]
+pub struct Fold<I, ID, F> {
+    base: I,
+    identity: ID,
+    fold_op: F,
+}
+
+impl<I: ParallelIterator + Debug, ID, F> Debug for Fold<I, ID, F> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("Fold").field("base", &self.base).finish()
+    }
+}
+
+impl<U, I, ID, F> ParallelIterator for Fold<I, ID, F>
+where
+    I: ParallelIterator,
+    F: Fn(U, I::Item) -> U + Sync + Send,
+    ID: Fn() -> U + Sync + Send,
+    U: Send,
+{
+    type Item = U;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let consumer1 = FoldConsumer {
+            base: consumer,
+            fold_op: &self.fold_op,
+            identity: &self.identity,
+        };
+        self.base.drive_unindexed(consumer1)
+    }
+}
+
+struct FoldConsumer<'c, C, ID, F> {
+    base: C,
+    fold_op: &'c F,
+    identity: &'c ID,
+}
+
+impl<'r, U, T, C, ID, F> Consumer<T> for FoldConsumer<'r, C, ID, F>
+where
+    C: Consumer<U>,
+    F: Fn(U, T) -> U + Sync,
+    ID: Fn() -> U + Sync,
+    U: Send,
+{
+    type Folder = FoldFolder<'r, C::Folder, U, F>;
+    type Reducer = C::Reducer;
+    type Result = C::Result;
+
+    fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) {
+        let (left, right, reducer) = self.base.split_at(index);
+        (
+            FoldConsumer { base: left, ..self },
+            FoldConsumer {
+                base: right,
+                ..self
+            },
+            reducer,
+        )
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        FoldFolder {
+            base: self.base.into_folder(),
+            item: (self.identity)(),
+            fold_op: self.fold_op,
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
+
+impl<'r, U, T, C, ID, F> UnindexedConsumer<T> for FoldConsumer<'r, C, ID, F>
+where
+    C: UnindexedConsumer<U>,
+    F: Fn(U, T) -> U + Sync,
+    ID: Fn() -> U + Sync,
+    U: Send,
+{
+    fn split_off_left(&self) -> Self {
+        FoldConsumer {
+            base: self.base.split_off_left(),
+            ..*self
+        }
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        self.base.to_reducer()
+    }
+}
+
+struct FoldFolder<'r, C, ID, F> {
+    base: C,
+    fold_op: &'r F,
+    item: ID,
+}
+
+impl<'r, C, ID, F, T> Folder<T> for FoldFolder<'r, C, ID, F>
+where
+    C: Folder<ID>,
+    F: Fn(ID, T) -> ID + Sync,
+{
+    type Result = C::Result;
+
+    fn consume(self, item: T) -> Self {
+        let item = (self.fold_op)(self.item, item);
+        FoldFolder {
+            base: self.base,
+            fold_op: self.fold_op,
+            item,
+        }
+    }
+
+    fn consume_iter<I>(self, iter: I) -> Self
+    where
+        I: IntoIterator<Item = T>,
+    {
+        fn not_full<C, ID, T>(base: &C) -> impl Fn(&T) -> bool + '_
+        where
+            C: Folder<ID>,
+        {
+            move |_| !base.full()
+        }
+
+        let base = self.base;
+        let item = iter
+            .into_iter()
+            // stop iterating if another thread has finished
+            .take_while(not_full(&base))
+            .fold(self.item, self.fold_op);
+
+        FoldFolder {
+            base,
+            item,
+            fold_op: self.fold_op,
+        }
+    }
+
+    fn complete(self) -> C::Result {
+        self.base.consume(self.item).complete()
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
+
+// ///////////////////////////////////////////////////////////////////////////
+
+impl<U, I, F> FoldWith<I, U, F>
+where
+    I: ParallelIterator,
+    F: Fn(U, I::Item) -> U + Sync + Send,
+    U: Send + Clone,
+{
+    pub(super) fn new(base: I, item: U, fold_op: F) -> Self {
+        FoldWith {
+            base,
+            item,
+            fold_op,
+        }
+    }
+}
+
+/// `FoldWith` is an iterator that applies a function over an iterator producing a single value.
+/// This struct is created by the [`fold_with()`] method on [`ParallelIterator`]
+///
+/// [`fold_with()`]: trait.ParallelIterator.html#method.fold_with
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Clone)]
+pub struct FoldWith<I, U, F> {
+    base: I,
+    item: U,
+    fold_op: F,
+}
+
+impl<I: ParallelIterator + Debug, U: Debug, F> Debug for FoldWith<I, U, F> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("FoldWith")
+            .field("base", &self.base)
+            .field("item", &self.item)
+            .finish()
+    }
+}
+
+impl<U, I, F> ParallelIterator for FoldWith<I, U, F>
+where
+    I: ParallelIterator,
+    F: Fn(U, I::Item) -> U + Sync + Send,
+    U: Send + Clone,
+{
+    type Item = U;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let consumer1 = FoldWithConsumer {
+            base: consumer,
+            item: self.item,
+            fold_op: &self.fold_op,
+        };
+        self.base.drive_unindexed(consumer1)
+    }
+}
+
+struct FoldWithConsumer<'c, C, U, F> {
+    base: C,
+    item: U,
+    fold_op: &'c F,
+}
+
+impl<'r, U, T, C, F> Consumer<T> for FoldWithConsumer<'r, C, U, F>
+where
+    C: Consumer<U>,
+    F: Fn(U, T) -> U + Sync,
+    U: Send + Clone,
+{
+    type Folder = FoldFolder<'r, C::Folder, U, F>;
+    type Reducer = C::Reducer;
+    type Result = C::Result;
+
+    fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) {
+        let (left, right, reducer) = self.base.split_at(index);
+        (
+            FoldWithConsumer {
+                base: left,
+                item: self.item.clone(),
+                ..self
+            },
+            FoldWithConsumer {
+                base: right,
+                ..self
+            },
+            reducer,
+        )
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        FoldFolder {
+            base: self.base.into_folder(),
+            item: self.item,
+            fold_op: self.fold_op,
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
+
+impl<'r, U, T, C, F> UnindexedConsumer<T> for FoldWithConsumer<'r, C, U, F>
+where
+    C: UnindexedConsumer<U>,
+    F: Fn(U, T) -> U + Sync,
+    U: Send + Clone,
+{
+    fn split_off_left(&self) -> Self {
+        FoldWithConsumer {
+            base: self.base.split_off_left(),
+            item: self.item.clone(),
+            ..*self
+        }
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        self.base.to_reducer()
+    }
+}
diff --git a/src/iter/for_each.rs b/src/iter/for_each.rs
new file mode 100644
index 0000000..3b77beb
--- /dev/null
+++ b/src/iter/for_each.rs
@@ -0,0 +1,77 @@
+use super::noop::*;
+use super::plumbing::*;
+use super::ParallelIterator;
+
+pub(super) fn for_each<I, F, T>(pi: I, op: &F)
+where
+    I: ParallelIterator<Item = T>,
+    F: Fn(T) + Sync,
+    T: Send,
+{
+    let consumer = ForEachConsumer { op };
+    pi.drive_unindexed(consumer)
+}
+
+struct ForEachConsumer<'f, F> {
+    op: &'f F,
+}
+
+impl<'f, F, T> Consumer<T> for ForEachConsumer<'f, F>
+where
+    F: Fn(T) + Sync,
+{
+    type Folder = ForEachConsumer<'f, F>;
+    type Reducer = NoopReducer;
+    type Result = ();
+
+    fn split_at(self, _index: usize) -> (Self, Self, NoopReducer) {
+        (self.split_off_left(), self, NoopReducer)
+    }
+
+    fn into_folder(self) -> Self {
+        self
+    }
+
+    fn full(&self) -> bool {
+        false
+    }
+}
+
+impl<'f, F, T> Folder<T> for ForEachConsumer<'f, F>
+where
+    F: Fn(T) + Sync,
+{
+    type Result = ();
+
+    fn consume(self, item: T) -> Self {
+        (self.op)(item);
+        self
+    }
+
+    fn consume_iter<I>(self, iter: I) -> Self
+    where
+        I: IntoIterator<Item = T>,
+    {
+        iter.into_iter().for_each(self.op);
+        self
+    }
+
+    fn complete(self) {}
+
+    fn full(&self) -> bool {
+        false
+    }
+}
+
+impl<'f, F, T> UnindexedConsumer<T> for ForEachConsumer<'f, F>
+where
+    F: Fn(T) + Sync,
+{
+    fn split_off_left(&self) -> Self {
+        ForEachConsumer { op: self.op }
+    }
+
+    fn to_reducer(&self) -> NoopReducer {
+        NoopReducer
+    }
+}
diff --git a/src/iter/from_par_iter.rs b/src/iter/from_par_iter.rs
new file mode 100644
index 0000000..3240f32
--- /dev/null
+++ b/src/iter/from_par_iter.rs
@@ -0,0 +1,228 @@
+use super::noop::NoopConsumer;
+use super::{FromParallelIterator, IntoParallelIterator, ParallelExtend, ParallelIterator};
+
+use std::borrow::Cow;
+use std::collections::LinkedList;
+use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
+use std::collections::{BinaryHeap, VecDeque};
+use std::hash::{BuildHasher, Hash};
+
+/// Creates an empty default collection and extends it.
+fn collect_extended<C, I>(par_iter: I) -> C
+where
+    I: IntoParallelIterator,
+    C: ParallelExtend<I::Item> + Default,
+{
+    let mut collection = C::default();
+    collection.par_extend(par_iter);
+    collection
+}
+
+/// Collects items from a parallel iterator into a vector.
+impl<T> FromParallelIterator<T> for Vec<T>
+where
+    T: Send,
+{
+    fn from_par_iter<I>(par_iter: I) -> Self
+    where
+        I: IntoParallelIterator<Item = T>,
+    {
+        collect_extended(par_iter)
+    }
+}
+
+/// Collects items from a parallel iterator into a vecdeque.
+impl<T> FromParallelIterator<T> for VecDeque<T>
+where
+    T: Send,
+{
+    fn from_par_iter<I>(par_iter: I) -> Self
+    where
+        I: IntoParallelIterator<Item = T>,
+    {
+        Vec::from_par_iter(par_iter).into()
+    }
+}
+
+/// Collects items from a parallel iterator into a binaryheap.
+/// The heap-ordering is calculated serially after all items are collected.
+impl<T> FromParallelIterator<T> for BinaryHeap<T>
+where
+    T: Ord + Send,
+{
+    fn from_par_iter<I>(par_iter: I) -> Self
+    where
+        I: IntoParallelIterator<Item = T>,
+    {
+        Vec::from_par_iter(par_iter).into()
+    }
+}
+
+/// Collects items from a parallel iterator into a freshly allocated
+/// linked list.
+impl<T> FromParallelIterator<T> for LinkedList<T>
+where
+    T: Send,
+{
+    fn from_par_iter<I>(par_iter: I) -> Self
+    where
+        I: IntoParallelIterator<Item = T>,
+    {
+        collect_extended(par_iter)
+    }
+}
+
+/// Collects (key, value) pairs from a parallel iterator into a
+/// hashmap. If multiple pairs correspond to the same key, then the
+/// ones produced earlier in the parallel iterator will be
+/// overwritten, just as with a sequential iterator.
+impl<K, V, S> FromParallelIterator<(K, V)> for HashMap<K, V, S>
+where
+    K: Eq + Hash + Send,
+    V: Send,
+    S: BuildHasher + Default + Send,
+{
+    fn from_par_iter<I>(par_iter: I) -> Self
+    where
+        I: IntoParallelIterator<Item = (K, V)>,
+    {
+        collect_extended(par_iter)
+    }
+}
+
+/// Collects (key, value) pairs from a parallel iterator into a
+/// btreemap. If multiple pairs correspond to the same key, then the
+/// ones produced earlier in the parallel iterator will be
+/// overwritten, just as with a sequential iterator.
+impl<K, V> FromParallelIterator<(K, V)> for BTreeMap<K, V>
+where
+    K: Ord + Send,
+    V: Send,
+{
+    fn from_par_iter<I>(par_iter: I) -> Self
+    where
+        I: IntoParallelIterator<Item = (K, V)>,
+    {
+        collect_extended(par_iter)
+    }
+}
+
+/// Collects values from a parallel iterator into a hashset.
+impl<V, S> FromParallelIterator<V> for HashSet<V, S>
+where
+    V: Eq + Hash + Send,
+    S: BuildHasher + Default + Send,
+{
+    fn from_par_iter<I>(par_iter: I) -> Self
+    where
+        I: IntoParallelIterator<Item = V>,
+    {
+        collect_extended(par_iter)
+    }
+}
+
+/// Collects values from a parallel iterator into a btreeset.
+impl<V> FromParallelIterator<V> for BTreeSet<V>
+where
+    V: Send + Ord,
+{
+    fn from_par_iter<I>(par_iter: I) -> Self
+    where
+        I: IntoParallelIterator<Item = V>,
+    {
+        collect_extended(par_iter)
+    }
+}
+
+/// Collects characters from a parallel iterator into a string.
+impl FromParallelIterator<char> for String {
+    fn from_par_iter<I>(par_iter: I) -> Self
+    where
+        I: IntoParallelIterator<Item = char>,
+    {
+        collect_extended(par_iter)
+    }
+}
+
+/// Collects characters from a parallel iterator into a string.
+impl<'a> FromParallelIterator<&'a char> for String {
+    fn from_par_iter<I>(par_iter: I) -> Self
+    where
+        I: IntoParallelIterator<Item = &'a char>,
+    {
+        collect_extended(par_iter)
+    }
+}
+
+/// Collects string slices from a parallel iterator into a string.
+impl<'a> FromParallelIterator<&'a str> for String {
+    fn from_par_iter<I>(par_iter: I) -> Self
+    where
+        I: IntoParallelIterator<Item = &'a str>,
+    {
+        collect_extended(par_iter)
+    }
+}
+
+/// Collects strings from a parallel iterator into one large string.
+impl FromParallelIterator<String> for String {
+    fn from_par_iter<I>(par_iter: I) -> Self
+    where
+        I: IntoParallelIterator<Item = String>,
+    {
+        collect_extended(par_iter)
+    }
+}
+
+/// Collects string slices from a parallel iterator into a string.
+impl<'a> FromParallelIterator<Cow<'a, str>> for String {
+    fn from_par_iter<I>(par_iter: I) -> Self
+    where
+        I: IntoParallelIterator<Item = Cow<'a, str>>,
+    {
+        collect_extended(par_iter)
+    }
+}
+
+/// Collects an arbitrary `Cow` collection.
+///
+/// Note, the standard library only has `FromIterator` for `Cow<'a, str>` and
+/// `Cow<'a, [T]>`, because no one thought to add a blanket implementation
+/// before it was stabilized.
+impl<'a, C: ?Sized, T> FromParallelIterator<T> for Cow<'a, C>
+where
+    C: ToOwned,
+    C::Owned: FromParallelIterator<T>,
+    T: Send,
+{
+    fn from_par_iter<I>(par_iter: I) -> Self
+    where
+        I: IntoParallelIterator<Item = T>,
+    {
+        Cow::Owned(C::Owned::from_par_iter(par_iter))
+    }
+}
+
+/// Collapses all unit items from a parallel iterator into one.
+///
+/// This is more useful when combined with higher-level abstractions, like
+/// collecting to a `Result<(), E>` where you only care about errors:
+///
+/// ```
+/// use std::io::*;
+/// use rayon::prelude::*;
+///
+/// let data = vec![1, 2, 3, 4, 5];
+/// let res: Result<()> = data.par_iter()
+///     .map(|x| writeln!(stdout(), "{}", x))
+///     .collect();
+/// assert!(res.is_ok());
+/// ```
+impl FromParallelIterator<()> for () {
+    fn from_par_iter<I>(par_iter: I) -> Self
+    where
+        I: IntoParallelIterator<Item = ()>,
+    {
+        par_iter.into_par_iter().drive_unindexed(NoopConsumer)
+    }
+}
diff --git a/src/iter/inspect.rs b/src/iter/inspect.rs
new file mode 100644
index 0000000..9b1cd09
--- /dev/null
+++ b/src/iter/inspect.rs
@@ -0,0 +1,257 @@
+use super::plumbing::*;
+use super::*;
+
+use std::fmt::{self, Debug};
+use std::iter;
+
+/// `Inspect` is an iterator that calls a function with a reference to each
+/// element before yielding it.
+///
+/// This struct is created by the [`inspect()`] method on [`ParallelIterator`]
+///
+/// [`inspect()`]: trait.ParallelIterator.html#method.inspect
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Clone)]
+pub struct Inspect<I: ParallelIterator, F> {
+    base: I,
+    inspect_op: F,
+}
+
+impl<I: ParallelIterator + Debug, F> Debug for Inspect<I, F> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("Inspect").field("base", &self.base).finish()
+    }
+}
+
+impl<I, F> Inspect<I, F>
+where
+    I: ParallelIterator,
+{
+    /// Creates a new `Inspect` iterator.
+    pub(super) fn new(base: I, inspect_op: F) -> Self {
+        Inspect { base, inspect_op }
+    }
+}
+
+impl<I, F> ParallelIterator for Inspect<I, F>
+where
+    I: ParallelIterator,
+    F: Fn(&I::Item) + Sync + Send,
+{
+    type Item = I::Item;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let consumer1 = InspectConsumer::new(consumer, &self.inspect_op);
+        self.base.drive_unindexed(consumer1)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        self.base.opt_len()
+    }
+}
+
+impl<I, F> IndexedParallelIterator for Inspect<I, F>
+where
+    I: IndexedParallelIterator,
+    F: Fn(&I::Item) + Sync + Send,
+{
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        let consumer1 = InspectConsumer::new(consumer, &self.inspect_op);
+        self.base.drive(consumer1)
+    }
+
+    fn len(&self) -> usize {
+        self.base.len()
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        return self.base.with_producer(Callback {
+            callback,
+            inspect_op: self.inspect_op,
+        });
+
+        struct Callback<CB, F> {
+            callback: CB,
+            inspect_op: F,
+        }
+
+        impl<T, F, CB> ProducerCallback<T> for Callback<CB, F>
+        where
+            CB: ProducerCallback<T>,
+            F: Fn(&T) + Sync,
+        {
+            type Output = CB::Output;
+
+            fn callback<P>(self, base: P) -> CB::Output
+            where
+                P: Producer<Item = T>,
+            {
+                let producer = InspectProducer {
+                    base,
+                    inspect_op: &self.inspect_op,
+                };
+                self.callback.callback(producer)
+            }
+        }
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+
+struct InspectProducer<'f, P, F> {
+    base: P,
+    inspect_op: &'f F,
+}
+
+impl<'f, P, F> Producer for InspectProducer<'f, P, F>
+where
+    P: Producer,
+    F: Fn(&P::Item) + Sync,
+{
+    type Item = P::Item;
+    type IntoIter = iter::Inspect<P::IntoIter, &'f F>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.base.into_iter().inspect(self.inspect_op)
+    }
+
+    fn min_len(&self) -> usize {
+        self.base.min_len()
+    }
+
+    fn max_len(&self) -> usize {
+        self.base.max_len()
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        let (left, right) = self.base.split_at(index);
+        (
+            InspectProducer {
+                base: left,
+                inspect_op: self.inspect_op,
+            },
+            InspectProducer {
+                base: right,
+                inspect_op: self.inspect_op,
+            },
+        )
+    }
+
+    fn fold_with<G>(self, folder: G) -> G
+    where
+        G: Folder<Self::Item>,
+    {
+        let folder1 = InspectFolder {
+            base: folder,
+            inspect_op: self.inspect_op,
+        };
+        self.base.fold_with(folder1).base
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// Consumer implementation
+
+struct InspectConsumer<'f, C, F> {
+    base: C,
+    inspect_op: &'f F,
+}
+
+impl<'f, C, F> InspectConsumer<'f, C, F> {
+    fn new(base: C, inspect_op: &'f F) -> Self {
+        InspectConsumer { base, inspect_op }
+    }
+}
+
+impl<'f, T, C, F> Consumer<T> for InspectConsumer<'f, C, F>
+where
+    C: Consumer<T>,
+    F: Fn(&T) + Sync,
+{
+    type Folder = InspectFolder<'f, C::Folder, F>;
+    type Reducer = C::Reducer;
+    type Result = C::Result;
+
+    fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) {
+        let (left, right, reducer) = self.base.split_at(index);
+        (
+            InspectConsumer::new(left, self.inspect_op),
+            InspectConsumer::new(right, self.inspect_op),
+            reducer,
+        )
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        InspectFolder {
+            base: self.base.into_folder(),
+            inspect_op: self.inspect_op,
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
+
+impl<'f, T, C, F> UnindexedConsumer<T> for InspectConsumer<'f, C, F>
+where
+    C: UnindexedConsumer<T>,
+    F: Fn(&T) + Sync,
+{
+    fn split_off_left(&self) -> Self {
+        InspectConsumer::new(self.base.split_off_left(), &self.inspect_op)
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        self.base.to_reducer()
+    }
+}
+
+struct InspectFolder<'f, C, F> {
+    base: C,
+    inspect_op: &'f F,
+}
+
+impl<'f, T, C, F> Folder<T> for InspectFolder<'f, C, F>
+where
+    C: Folder<T>,
+    F: Fn(&T),
+{
+    type Result = C::Result;
+
+    fn consume(self, item: T) -> Self {
+        (self.inspect_op)(&item);
+        InspectFolder {
+            base: self.base.consume(item),
+            inspect_op: self.inspect_op,
+        }
+    }
+
+    fn consume_iter<I>(mut self, iter: I) -> Self
+    where
+        I: IntoIterator<Item = T>,
+    {
+        self.base = self
+            .base
+            .consume_iter(iter.into_iter().inspect(self.inspect_op));
+        self
+    }
+
+    fn complete(self) -> C::Result {
+        self.base.complete()
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
diff --git a/src/iter/interleave.rs b/src/iter/interleave.rs
new file mode 100644
index 0000000..b5d43d5
--- /dev/null
+++ b/src/iter/interleave.rs
@@ -0,0 +1,336 @@
+use super::plumbing::*;
+use super::*;
+use std::cmp;
+use std::iter::Fuse;
+
+/// `Interleave` is an iterator that interleaves elements of iterators
+/// `i` and `j` in one continuous iterator. This struct is created by
+/// the [`interleave()`] method on [`IndexedParallelIterator`]
+///
+/// [`interleave()`]: trait.IndexedParallelIterator.html#method.interleave
+/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Debug, Clone)]
+pub struct Interleave<I, J>
+where
+    I: IndexedParallelIterator,
+    J: IndexedParallelIterator<Item = I::Item>,
+{
+    i: I,
+    j: J,
+}
+
+impl<I, J> Interleave<I, J>
+where
+    I: IndexedParallelIterator,
+    J: IndexedParallelIterator<Item = I::Item>,
+{
+    /// Creates a new `Interleave` iterator
+    pub(super) fn new(i: I, j: J) -> Self {
+        Interleave { i, j }
+    }
+}
+
+impl<I, J> ParallelIterator for Interleave<I, J>
+where
+    I: IndexedParallelIterator,
+    J: IndexedParallelIterator<Item = I::Item>,
+{
+    type Item = I::Item;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<I::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<I, J> IndexedParallelIterator for Interleave<I, J>
+where
+    I: IndexedParallelIterator,
+    J: IndexedParallelIterator<Item = I::Item>,
+{
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn len(&self) -> usize {
+        self.i.len().checked_add(self.j.len()).expect("overflow")
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        let (i_len, j_len) = (self.i.len(), self.j.len());
+        return self.i.with_producer(CallbackI {
+            callback,
+            i_len,
+            j_len,
+            i_next: false,
+            j: self.j,
+        });
+
+        struct CallbackI<CB, J> {
+            callback: CB,
+            i_len: usize,
+            j_len: usize,
+            i_next: bool,
+            j: J,
+        }
+
+        impl<CB, J> ProducerCallback<J::Item> for CallbackI<CB, J>
+        where
+            J: IndexedParallelIterator,
+            CB: ProducerCallback<J::Item>,
+        {
+            type Output = CB::Output;
+
+            fn callback<I>(self, i_producer: I) -> Self::Output
+            where
+                I: Producer<Item = J::Item>,
+            {
+                self.j.with_producer(CallbackJ {
+                    i_producer,
+                    i_len: self.i_len,
+                    j_len: self.j_len,
+                    i_next: self.i_next,
+                    callback: self.callback,
+                })
+            }
+        }
+
+        struct CallbackJ<CB, I> {
+            callback: CB,
+            i_len: usize,
+            j_len: usize,
+            i_next: bool,
+            i_producer: I,
+        }
+
+        impl<CB, I> ProducerCallback<I::Item> for CallbackJ<CB, I>
+        where
+            I: Producer,
+            CB: ProducerCallback<I::Item>,
+        {
+            type Output = CB::Output;
+
+            fn callback<J>(self, j_producer: J) -> Self::Output
+            where
+                J: Producer<Item = I::Item>,
+            {
+                let producer = InterleaveProducer::new(
+                    self.i_producer,
+                    j_producer,
+                    self.i_len,
+                    self.j_len,
+                    self.i_next,
+                );
+                self.callback.callback(producer)
+            }
+        }
+    }
+}
+
+struct InterleaveProducer<I, J>
+where
+    I: Producer,
+    J: Producer<Item = I::Item>,
+{
+    i: I,
+    j: J,
+    i_len: usize,
+    j_len: usize,
+    i_next: bool,
+}
+
+impl<I, J> InterleaveProducer<I, J>
+where
+    I: Producer,
+    J: Producer<Item = I::Item>,
+{
+    fn new(i: I, j: J, i_len: usize, j_len: usize, i_next: bool) -> InterleaveProducer<I, J> {
+        InterleaveProducer {
+            i,
+            j,
+            i_len,
+            j_len,
+            i_next,
+        }
+    }
+}
+
+impl<I, J> Producer for InterleaveProducer<I, J>
+where
+    I: Producer,
+    J: Producer<Item = I::Item>,
+{
+    type Item = I::Item;
+    type IntoIter = InterleaveSeq<I::IntoIter, J::IntoIter>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        InterleaveSeq {
+            i: self.i.into_iter().fuse(),
+            j: self.j.into_iter().fuse(),
+            i_next: self.i_next,
+        }
+    }
+
+    fn min_len(&self) -> usize {
+        cmp::max(self.i.min_len(), self.j.min_len())
+    }
+
+    fn max_len(&self) -> usize {
+        cmp::min(self.i.max_len(), self.j.max_len())
+    }
+
+    /// We know 0 < index <= self.i_len + self.j_len
+    ///
+    /// Find a, b satisfying:
+    ///
+    ///  (1) 0 < a <= self.i_len
+    ///  (2) 0 < b <= self.j_len
+    ///  (3) a + b == index
+    ///
+    /// For even splits, set a = b = index/2.
+    /// For odd splits, set a = (index/2)+1, b = index/2, if `i`
+    /// should yield the next element, otherwise, if `j` should yield
+    /// the next element, set a = index/2 and b = (index/2)+1
+    fn split_at(self, index: usize) -> (Self, Self) {
+        #[inline]
+        fn odd_offset(flag: bool) -> usize {
+            (!flag) as usize
+        }
+
+        let even = index % 2 == 0;
+        let idx = index >> 1;
+
+        // desired split
+        let (i_idx, j_idx) = (
+            idx + odd_offset(even || self.i_next),
+            idx + odd_offset(even || !self.i_next),
+        );
+
+        let (i_split, j_split) = if self.i_len >= i_idx && self.j_len >= j_idx {
+            (i_idx, j_idx)
+        } else if self.i_len >= i_idx {
+            // j too short
+            (index - self.j_len, self.j_len)
+        } else {
+            // i too short
+            (self.i_len, index - self.i_len)
+        };
+
+        let trailing_i_next = even == self.i_next;
+        let (i_left, i_right) = self.i.split_at(i_split);
+        let (j_left, j_right) = self.j.split_at(j_split);
+
+        (
+            InterleaveProducer::new(i_left, j_left, i_split, j_split, self.i_next),
+            InterleaveProducer::new(
+                i_right,
+                j_right,
+                self.i_len - i_split,
+                self.j_len - j_split,
+                trailing_i_next,
+            ),
+        )
+    }
+}
+
+/// Wrapper for Interleave to implement DoubleEndedIterator and
+/// ExactSizeIterator.
+///
+/// This iterator is fused.
+struct InterleaveSeq<I, J> {
+    i: Fuse<I>,
+    j: Fuse<J>,
+
+    /// Flag to control which iterator should provide the next element. When
+    /// `false` then `i` produces the next element, otherwise `j` produces the
+    /// next element.
+    i_next: bool,
+}
+
+/// Iterator implementation for InterleaveSeq. This implementation is
+/// taken more or less verbatim from itertools. It is replicated here
+/// (instead of calling itertools directly), because we also need to
+/// implement `DoubledEndedIterator` and `ExactSizeIterator`.
+impl<I, J> Iterator for InterleaveSeq<I, J>
+where
+    I: Iterator,
+    J: Iterator<Item = I::Item>,
+{
+    type Item = I::Item;
+
+    #[inline]
+    fn next(&mut self) -> Option<Self::Item> {
+        self.i_next = !self.i_next;
+        if self.i_next {
+            match self.i.next() {
+                None => self.j.next(),
+                r => r,
+            }
+        } else {
+            match self.j.next() {
+                None => self.i.next(),
+                r => r,
+            }
+        }
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        let (ih, jh) = (self.i.size_hint(), self.j.size_hint());
+        let min = ih.0.saturating_add(jh.0);
+        let max = match (ih.1, jh.1) {
+            (Some(x), Some(y)) => x.checked_add(y),
+            _ => None,
+        };
+        (min, max)
+    }
+}
+
+// The implementation for DoubleEndedIterator requires
+// ExactSizeIterator to provide `next_back()`. The last element will
+// come from the iterator that runs out last (ie has the most elements
+// in it). If the iterators have the same number of elements, then the
+// last iterator will provide the last element.
+impl<I, J> DoubleEndedIterator for InterleaveSeq<I, J>
+where
+    I: DoubleEndedIterator + ExactSizeIterator,
+    J: DoubleEndedIterator<Item = I::Item> + ExactSizeIterator<Item = I::Item>,
+{
+    #[inline]
+    fn next_back(&mut self) -> Option<I::Item> {
+        if self.i.len() == self.j.len() {
+            if self.i_next {
+                self.i.next_back()
+            } else {
+                self.j.next_back()
+            }
+        } else if self.i.len() < self.j.len() {
+            self.j.next_back()
+        } else {
+            self.i.next_back()
+        }
+    }
+}
+
+impl<I, J> ExactSizeIterator for InterleaveSeq<I, J>
+where
+    I: ExactSizeIterator,
+    J: ExactSizeIterator<Item = I::Item>,
+{
+    #[inline]
+    fn len(&self) -> usize {
+        self.i.len() + self.j.len()
+    }
+}
diff --git a/src/iter/interleave_shortest.rs b/src/iter/interleave_shortest.rs
new file mode 100644
index 0000000..7d81369
--- /dev/null
+++ b/src/iter/interleave_shortest.rs
@@ -0,0 +1,85 @@
+use super::plumbing::*;
+use super::*;
+
+/// `InterleaveShortest` is an iterator that works similarly to
+/// `Interleave`, but this version stops returning elements once one
+/// of the iterators run out.
+///
+/// This struct is created by the [`interleave_shortest()`] method on
+/// [`IndexedParallelIterator`].
+///
+/// [`interleave_shortest()`]: trait.IndexedParallelIterator.html#method.interleave_shortest
+/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Debug, Clone)]
+pub struct InterleaveShortest<I, J>
+where
+    I: IndexedParallelIterator,
+    J: IndexedParallelIterator<Item = I::Item>,
+{
+    interleave: Interleave<Take<I>, Take<J>>,
+}
+
+impl<I, J> InterleaveShortest<I, J>
+where
+    I: IndexedParallelIterator,
+    J: IndexedParallelIterator<Item = I::Item>,
+{
+    /// Creates a new `InterleaveShortest` iterator
+    pub(super) fn new(i: I, j: J) -> Self {
+        InterleaveShortest {
+            interleave: if i.len() <= j.len() {
+                // take equal lengths from both iterators
+                let n = i.len();
+                i.take(n).interleave(j.take(n))
+            } else {
+                // take one extra item from the first iterator
+                let n = j.len();
+                i.take(n + 1).interleave(j.take(n))
+            },
+        }
+    }
+}
+
+impl<I, J> ParallelIterator for InterleaveShortest<I, J>
+where
+    I: IndexedParallelIterator,
+    J: IndexedParallelIterator<Item = I::Item>,
+{
+    type Item = I::Item;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<I::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<I, J> IndexedParallelIterator for InterleaveShortest<I, J>
+where
+    I: IndexedParallelIterator,
+    J: IndexedParallelIterator<Item = I::Item>,
+{
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn len(&self) -> usize {
+        self.interleave.len()
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        self.interleave.with_producer(callback)
+    }
+}
diff --git a/src/iter/intersperse.rs b/src/iter/intersperse.rs
new file mode 100644
index 0000000..798bdc1
--- /dev/null
+++ b/src/iter/intersperse.rs
@@ -0,0 +1,410 @@
+use super::plumbing::*;
+use super::*;
+use std::cell::Cell;
+use std::iter::{self, Fuse};
+
+/// `Intersperse` is an iterator that inserts a particular item between each
+/// item of the adapted iterator.  This struct is created by the
+/// [`intersperse()`] method on [`ParallelIterator`]
+///
+/// [`intersperse()`]: trait.ParallelIterator.html#method.intersperse
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Clone, Debug)]
+pub struct Intersperse<I>
+where
+    I: ParallelIterator,
+    I::Item: Clone,
+{
+    base: I,
+    item: I::Item,
+}
+
+impl<I> Intersperse<I>
+where
+    I: ParallelIterator,
+    I::Item: Clone,
+{
+    /// Creates a new `Intersperse` iterator
+    pub(super) fn new(base: I, item: I::Item) -> Self {
+        Intersperse { base, item }
+    }
+}
+
+impl<I> ParallelIterator for Intersperse<I>
+where
+    I: ParallelIterator,
+    I::Item: Clone + Send,
+{
+    type Item = I::Item;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<I::Item>,
+    {
+        let consumer1 = IntersperseConsumer::new(consumer, self.item);
+        self.base.drive_unindexed(consumer1)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        match self.base.opt_len()? {
+            0 => Some(0),
+            len => len.checked_add(len - 1),
+        }
+    }
+}
+
+impl<I> IndexedParallelIterator for Intersperse<I>
+where
+    I: IndexedParallelIterator,
+    I::Item: Clone + Send,
+{
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        let consumer1 = IntersperseConsumer::new(consumer, self.item);
+        self.base.drive(consumer1)
+    }
+
+    fn len(&self) -> usize {
+        let len = self.base.len();
+        if len > 0 {
+            len.checked_add(len - 1).expect("overflow")
+        } else {
+            0
+        }
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        let len = self.len();
+        return self.base.with_producer(Callback {
+            callback,
+            item: self.item,
+            len,
+        });
+
+        struct Callback<CB, T> {
+            callback: CB,
+            item: T,
+            len: usize,
+        }
+
+        impl<T, CB> ProducerCallback<T> for Callback<CB, T>
+        where
+            CB: ProducerCallback<T>,
+            T: Clone + Send,
+        {
+            type Output = CB::Output;
+
+            fn callback<P>(self, base: P) -> CB::Output
+            where
+                P: Producer<Item = T>,
+            {
+                let producer = IntersperseProducer::new(base, self.item, self.len);
+                self.callback.callback(producer)
+            }
+        }
+    }
+}
+
+struct IntersperseProducer<P>
+where
+    P: Producer,
+{
+    base: P,
+    item: P::Item,
+    len: usize,
+    clone_first: bool,
+}
+
+impl<P> IntersperseProducer<P>
+where
+    P: Producer,
+{
+    fn new(base: P, item: P::Item, len: usize) -> Self {
+        IntersperseProducer {
+            base,
+            item,
+            len,
+            clone_first: false,
+        }
+    }
+}
+
+impl<P> Producer for IntersperseProducer<P>
+where
+    P: Producer,
+    P::Item: Clone + Send,
+{
+    type Item = P::Item;
+    type IntoIter = IntersperseIter<P::IntoIter>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        IntersperseIter {
+            base: self.base.into_iter().fuse(),
+            item: self.item,
+            clone_first: self.len > 0 && self.clone_first,
+
+            // If there's more than one item, then even lengths end the opposite
+            // of how they started with respect to interspersed clones.
+            clone_last: self.len > 1 && ((self.len & 1 == 0) ^ self.clone_first),
+        }
+    }
+
+    fn min_len(&self) -> usize {
+        self.base.min_len()
+    }
+    fn max_len(&self) -> usize {
+        self.base.max_len()
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        debug_assert!(index <= self.len);
+
+        // The left needs half of the items from the base producer, and the
+        // other half will be our interspersed item.  If we're not leading with
+        // a cloned item, then we need to round up the base number of items,
+        // otherwise round down.
+        let base_index = (index + !self.clone_first as usize) / 2;
+        let (left_base, right_base) = self.base.split_at(base_index);
+
+        let left = IntersperseProducer {
+            base: left_base,
+            item: self.item.clone(),
+            len: index,
+            clone_first: self.clone_first,
+        };
+
+        let right = IntersperseProducer {
+            base: right_base,
+            item: self.item,
+            len: self.len - index,
+
+            // If the index is odd, the right side toggles `clone_first`.
+            clone_first: (index & 1 == 1) ^ self.clone_first,
+        };
+
+        (left, right)
+    }
+
+    fn fold_with<F>(self, folder: F) -> F
+    where
+        F: Folder<Self::Item>,
+    {
+        let folder1 = IntersperseFolder {
+            base: folder,
+            item: self.item,
+            clone_first: self.clone_first,
+        };
+        self.base.fold_with(folder1).base
+    }
+}
+
+struct IntersperseIter<I>
+where
+    I: Iterator,
+{
+    base: Fuse<I>,
+    item: I::Item,
+    clone_first: bool,
+    clone_last: bool,
+}
+
+impl<I> Iterator for IntersperseIter<I>
+where
+    I: DoubleEndedIterator + ExactSizeIterator,
+    I::Item: Clone,
+{
+    type Item = I::Item;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.clone_first {
+            self.clone_first = false;
+            Some(self.item.clone())
+        } else if let next @ Some(_) = self.base.next() {
+            // If there are any items left, we'll need another clone in front.
+            self.clone_first = self.base.len() != 0;
+            next
+        } else if self.clone_last {
+            self.clone_last = false;
+            Some(self.item.clone())
+        } else {
+            None
+        }
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        let len = self.len();
+        (len, Some(len))
+    }
+}
+
+impl<I> DoubleEndedIterator for IntersperseIter<I>
+where
+    I: DoubleEndedIterator + ExactSizeIterator,
+    I::Item: Clone,
+{
+    fn next_back(&mut self) -> Option<Self::Item> {
+        if self.clone_last {
+            self.clone_last = false;
+            Some(self.item.clone())
+        } else if let next_back @ Some(_) = self.base.next_back() {
+            // If there are any items left, we'll need another clone in back.
+            self.clone_last = self.base.len() != 0;
+            next_back
+        } else if self.clone_first {
+            self.clone_first = false;
+            Some(self.item.clone())
+        } else {
+            None
+        }
+    }
+}
+
+impl<I> ExactSizeIterator for IntersperseIter<I>
+where
+    I: DoubleEndedIterator + ExactSizeIterator,
+    I::Item: Clone,
+{
+    fn len(&self) -> usize {
+        let len = self.base.len();
+        len + len.saturating_sub(1) + self.clone_first as usize + self.clone_last as usize
+    }
+}
+
+struct IntersperseConsumer<C, T> {
+    base: C,
+    item: T,
+    clone_first: Cell<bool>,
+}
+
+impl<C, T> IntersperseConsumer<C, T>
+where
+    C: Consumer<T>,
+{
+    fn new(base: C, item: T) -> Self {
+        IntersperseConsumer {
+            base,
+            item,
+            clone_first: false.into(),
+        }
+    }
+}
+
+impl<C, T> Consumer<T> for IntersperseConsumer<C, T>
+where
+    C: Consumer<T>,
+    T: Clone + Send,
+{
+    type Folder = IntersperseFolder<C::Folder, T>;
+    type Reducer = C::Reducer;
+    type Result = C::Result;
+
+    fn split_at(mut self, index: usize) -> (Self, Self, Self::Reducer) {
+        // We'll feed twice as many items to the base consumer, except if we're
+        // not currently leading with a cloned item, then it's one less.
+        let base_index = index + index.saturating_sub(!self.clone_first.get() as usize);
+        let (left, right, reducer) = self.base.split_at(base_index);
+
+        let right = IntersperseConsumer {
+            base: right,
+            item: self.item.clone(),
+            clone_first: true.into(),
+        };
+        self.base = left;
+        (self, right, reducer)
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        IntersperseFolder {
+            base: self.base.into_folder(),
+            item: self.item,
+            clone_first: self.clone_first.get(),
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
+
+impl<C, T> UnindexedConsumer<T> for IntersperseConsumer<C, T>
+where
+    C: UnindexedConsumer<T>,
+    T: Clone + Send,
+{
+    fn split_off_left(&self) -> Self {
+        let left = IntersperseConsumer {
+            base: self.base.split_off_left(),
+            item: self.item.clone(),
+            clone_first: self.clone_first.clone(),
+        };
+        self.clone_first.set(true);
+        left
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        self.base.to_reducer()
+    }
+}
+
+struct IntersperseFolder<C, T> {
+    base: C,
+    item: T,
+    clone_first: bool,
+}
+
+impl<C, T> Folder<T> for IntersperseFolder<C, T>
+where
+    C: Folder<T>,
+    T: Clone,
+{
+    type Result = C::Result;
+
+    fn consume(mut self, item: T) -> Self {
+        if self.clone_first {
+            self.base = self.base.consume(self.item.clone());
+            if self.base.full() {
+                return self;
+            }
+        } else {
+            self.clone_first = true;
+        }
+        self.base = self.base.consume(item);
+        self
+    }
+
+    fn consume_iter<I>(self, iter: I) -> Self
+    where
+        I: IntoIterator<Item = T>,
+    {
+        let mut clone_first = self.clone_first;
+        let between_item = self.item;
+        let base = self.base.consume_iter(iter.into_iter().flat_map(|item| {
+            let first = if clone_first {
+                Some(between_item.clone())
+            } else {
+                clone_first = true;
+                None
+            };
+            first.into_iter().chain(iter::once(item))
+        }));
+        IntersperseFolder {
+            base,
+            item: between_item,
+            clone_first,
+        }
+    }
+
+    fn complete(self) -> C::Result {
+        self.base.complete()
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
diff --git a/src/iter/len.rs b/src/iter/len.rs
new file mode 100644
index 0000000..e65b3c0
--- /dev/null
+++ b/src/iter/len.rs
@@ -0,0 +1,271 @@
+use super::plumbing::*;
+use super::*;
+use std::cmp;
+
+/// `MinLen` is an iterator that imposes a minimum length on iterator splits.
+/// This struct is created by the [`min_len()`] method on [`IndexedParallelIterator`]
+///
+/// [`min_len()`]: trait.IndexedParallelIterator.html#method.min_len
+/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Debug, Clone)]
+pub struct MinLen<I: IndexedParallelIterator> {
+    base: I,
+    min: usize,
+}
+
+impl<I> MinLen<I>
+where
+    I: IndexedParallelIterator,
+{
+    /// Creates a new `MinLen` iterator.
+    pub(super) fn new(base: I, min: usize) -> Self {
+        MinLen { base, min }
+    }
+}
+
+impl<I> ParallelIterator for MinLen<I>
+where
+    I: IndexedParallelIterator,
+{
+    type Item = I::Item;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<I> IndexedParallelIterator for MinLen<I>
+where
+    I: IndexedParallelIterator,
+{
+    fn drive<C: Consumer<Self::Item>>(self, consumer: C) -> C::Result {
+        bridge(self, consumer)
+    }
+
+    fn len(&self) -> usize {
+        self.base.len()
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        return self.base.with_producer(Callback {
+            callback,
+            min: self.min,
+        });
+
+        struct Callback<CB> {
+            callback: CB,
+            min: usize,
+        }
+
+        impl<T, CB> ProducerCallback<T> for Callback<CB>
+        where
+            CB: ProducerCallback<T>,
+        {
+            type Output = CB::Output;
+            fn callback<P>(self, base: P) -> CB::Output
+            where
+                P: Producer<Item = T>,
+            {
+                let producer = MinLenProducer {
+                    base,
+                    min: self.min,
+                };
+                self.callback.callback(producer)
+            }
+        }
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// `MinLenProducer` implementation
+
+struct MinLenProducer<P> {
+    base: P,
+    min: usize,
+}
+
+impl<P> Producer for MinLenProducer<P>
+where
+    P: Producer,
+{
+    type Item = P::Item;
+    type IntoIter = P::IntoIter;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.base.into_iter()
+    }
+
+    fn min_len(&self) -> usize {
+        cmp::max(self.min, self.base.min_len())
+    }
+
+    fn max_len(&self) -> usize {
+        self.base.max_len()
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        let (left, right) = self.base.split_at(index);
+        (
+            MinLenProducer {
+                base: left,
+                min: self.min,
+            },
+            MinLenProducer {
+                base: right,
+                min: self.min,
+            },
+        )
+    }
+
+    fn fold_with<F>(self, folder: F) -> F
+    where
+        F: Folder<Self::Item>,
+    {
+        self.base.fold_with(folder)
+    }
+}
+
+/// `MaxLen` is an iterator that imposes a maximum length on iterator splits.
+/// This struct is created by the [`max_len()`] method on [`IndexedParallelIterator`]
+///
+/// [`max_len()`]: trait.IndexedParallelIterator.html#method.max_len
+/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Debug, Clone)]
+pub struct MaxLen<I: IndexedParallelIterator> {
+    base: I,
+    max: usize,
+}
+
+impl<I> MaxLen<I>
+where
+    I: IndexedParallelIterator,
+{
+    /// Creates a new `MaxLen` iterator.
+    pub(super) fn new(base: I, max: usize) -> Self {
+        MaxLen { base, max }
+    }
+}
+
+impl<I> ParallelIterator for MaxLen<I>
+where
+    I: IndexedParallelIterator,
+{
+    type Item = I::Item;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<I> IndexedParallelIterator for MaxLen<I>
+where
+    I: IndexedParallelIterator,
+{
+    fn drive<C: Consumer<Self::Item>>(self, consumer: C) -> C::Result {
+        bridge(self, consumer)
+    }
+
+    fn len(&self) -> usize {
+        self.base.len()
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        return self.base.with_producer(Callback {
+            callback,
+            max: self.max,
+        });
+
+        struct Callback<CB> {
+            callback: CB,
+            max: usize,
+        }
+
+        impl<T, CB> ProducerCallback<T> for Callback<CB>
+        where
+            CB: ProducerCallback<T>,
+        {
+            type Output = CB::Output;
+            fn callback<P>(self, base: P) -> CB::Output
+            where
+                P: Producer<Item = T>,
+            {
+                let producer = MaxLenProducer {
+                    base,
+                    max: self.max,
+                };
+                self.callback.callback(producer)
+            }
+        }
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// `MaxLenProducer` implementation
+
+struct MaxLenProducer<P> {
+    base: P,
+    max: usize,
+}
+
+impl<P> Producer for MaxLenProducer<P>
+where
+    P: Producer,
+{
+    type Item = P::Item;
+    type IntoIter = P::IntoIter;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.base.into_iter()
+    }
+
+    fn min_len(&self) -> usize {
+        self.base.min_len()
+    }
+
+    fn max_len(&self) -> usize {
+        cmp::min(self.max, self.base.max_len())
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        let (left, right) = self.base.split_at(index);
+        (
+            MaxLenProducer {
+                base: left,
+                max: self.max,
+            },
+            MaxLenProducer {
+                base: right,
+                max: self.max,
+            },
+        )
+    }
+
+    fn fold_with<F>(self, folder: F) -> F
+    where
+        F: Folder<Self::Item>,
+    {
+        self.base.fold_with(folder)
+    }
+}
diff --git a/src/iter/map.rs b/src/iter/map.rs
new file mode 100644
index 0000000..f2a35ff
--- /dev/null
+++ b/src/iter/map.rs
@@ -0,0 +1,259 @@
+use super::plumbing::*;
+use super::*;
+
+use std::fmt::{self, Debug};
+use std::iter;
+
+/// `Map` is an iterator that transforms the elements of an underlying iterator.
+///
+/// This struct is created by the [`map()`] method on [`ParallelIterator`]
+///
+/// [`map()`]: trait.ParallelIterator.html#method.map
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Clone)]
+pub struct Map<I: ParallelIterator, F> {
+    base: I,
+    map_op: F,
+}
+
+impl<I: ParallelIterator + Debug, F> Debug for Map<I, F> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("Map").field("base", &self.base).finish()
+    }
+}
+
+impl<I, F> Map<I, F>
+where
+    I: ParallelIterator,
+{
+    /// Creates a new `Map` iterator.
+    pub(super) fn new(base: I, map_op: F) -> Self {
+        Map { base, map_op }
+    }
+}
+
+impl<I, F, R> ParallelIterator for Map<I, F>
+where
+    I: ParallelIterator,
+    F: Fn(I::Item) -> R + Sync + Send,
+    R: Send,
+{
+    type Item = F::Output;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let consumer1 = MapConsumer::new(consumer, &self.map_op);
+        self.base.drive_unindexed(consumer1)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        self.base.opt_len()
+    }
+}
+
+impl<I, F, R> IndexedParallelIterator for Map<I, F>
+where
+    I: IndexedParallelIterator,
+    F: Fn(I::Item) -> R + Sync + Send,
+    R: Send,
+{
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        let consumer1 = MapConsumer::new(consumer, &self.map_op);
+        self.base.drive(consumer1)
+    }
+
+    fn len(&self) -> usize {
+        self.base.len()
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        return self.base.with_producer(Callback {
+            callback,
+            map_op: self.map_op,
+        });
+
+        struct Callback<CB, F> {
+            callback: CB,
+            map_op: F,
+        }
+
+        impl<T, F, R, CB> ProducerCallback<T> for Callback<CB, F>
+        where
+            CB: ProducerCallback<R>,
+            F: Fn(T) -> R + Sync,
+            R: Send,
+        {
+            type Output = CB::Output;
+
+            fn callback<P>(self, base: P) -> CB::Output
+            where
+                P: Producer<Item = T>,
+            {
+                let producer = MapProducer {
+                    base,
+                    map_op: &self.map_op,
+                };
+                self.callback.callback(producer)
+            }
+        }
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+
+struct MapProducer<'f, P, F> {
+    base: P,
+    map_op: &'f F,
+}
+
+impl<'f, P, F, R> Producer for MapProducer<'f, P, F>
+where
+    P: Producer,
+    F: Fn(P::Item) -> R + Sync,
+    R: Send,
+{
+    type Item = F::Output;
+    type IntoIter = iter::Map<P::IntoIter, &'f F>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.base.into_iter().map(self.map_op)
+    }
+
+    fn min_len(&self) -> usize {
+        self.base.min_len()
+    }
+    fn max_len(&self) -> usize {
+        self.base.max_len()
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        let (left, right) = self.base.split_at(index);
+        (
+            MapProducer {
+                base: left,
+                map_op: self.map_op,
+            },
+            MapProducer {
+                base: right,
+                map_op: self.map_op,
+            },
+        )
+    }
+
+    fn fold_with<G>(self, folder: G) -> G
+    where
+        G: Folder<Self::Item>,
+    {
+        let folder1 = MapFolder {
+            base: folder,
+            map_op: self.map_op,
+        };
+        self.base.fold_with(folder1).base
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// Consumer implementation
+
+struct MapConsumer<'f, C, F> {
+    base: C,
+    map_op: &'f F,
+}
+
+impl<'f, C, F> MapConsumer<'f, C, F> {
+    fn new(base: C, map_op: &'f F) -> Self {
+        MapConsumer { base, map_op }
+    }
+}
+
+impl<'f, T, R, C, F> Consumer<T> for MapConsumer<'f, C, F>
+where
+    C: Consumer<F::Output>,
+    F: Fn(T) -> R + Sync,
+    R: Send,
+{
+    type Folder = MapFolder<'f, C::Folder, F>;
+    type Reducer = C::Reducer;
+    type Result = C::Result;
+
+    fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) {
+        let (left, right, reducer) = self.base.split_at(index);
+        (
+            MapConsumer::new(left, self.map_op),
+            MapConsumer::new(right, self.map_op),
+            reducer,
+        )
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        MapFolder {
+            base: self.base.into_folder(),
+            map_op: self.map_op,
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
+
+impl<'f, T, R, C, F> UnindexedConsumer<T> for MapConsumer<'f, C, F>
+where
+    C: UnindexedConsumer<F::Output>,
+    F: Fn(T) -> R + Sync,
+    R: Send,
+{
+    fn split_off_left(&self) -> Self {
+        MapConsumer::new(self.base.split_off_left(), &self.map_op)
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        self.base.to_reducer()
+    }
+}
+
+struct MapFolder<'f, C, F> {
+    base: C,
+    map_op: &'f F,
+}
+
+impl<'f, T, R, C, F> Folder<T> for MapFolder<'f, C, F>
+where
+    C: Folder<F::Output>,
+    F: Fn(T) -> R,
+{
+    type Result = C::Result;
+
+    fn consume(self, item: T) -> Self {
+        let mapped_item = (self.map_op)(item);
+        MapFolder {
+            base: self.base.consume(mapped_item),
+            map_op: self.map_op,
+        }
+    }
+
+    fn consume_iter<I>(mut self, iter: I) -> Self
+    where
+        I: IntoIterator<Item = T>,
+    {
+        self.base = self.base.consume_iter(iter.into_iter().map(self.map_op));
+        self
+    }
+
+    fn complete(self) -> C::Result {
+        self.base.complete()
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
diff --git a/src/iter/map_with.rs b/src/iter/map_with.rs
new file mode 100644
index 0000000..10b1b4c
--- /dev/null
+++ b/src/iter/map_with.rs
@@ -0,0 +1,573 @@
+use super::plumbing::*;
+use super::*;
+
+use std::fmt::{self, Debug};
+
+/// `MapWith` is an iterator that transforms the elements of an underlying iterator.
+///
+/// This struct is created by the [`map_with()`] method on [`ParallelIterator`]
+///
+/// [`map_with()`]: trait.ParallelIterator.html#method.map_with
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Clone)]
+pub struct MapWith<I: ParallelIterator, T, F> {
+    base: I,
+    item: T,
+    map_op: F,
+}
+
+impl<I: ParallelIterator + Debug, T: Debug, F> Debug for MapWith<I, T, F> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("MapWith")
+            .field("base", &self.base)
+            .field("item", &self.item)
+            .finish()
+    }
+}
+
+impl<I, T, F> MapWith<I, T, F>
+where
+    I: ParallelIterator,
+{
+    /// Creates a new `MapWith` iterator.
+    pub(super) fn new(base: I, item: T, map_op: F) -> Self {
+        MapWith { base, item, map_op }
+    }
+}
+
+impl<I, T, F, R> ParallelIterator for MapWith<I, T, F>
+where
+    I: ParallelIterator,
+    T: Send + Clone,
+    F: Fn(&mut T, I::Item) -> R + Sync + Send,
+    R: Send,
+{
+    type Item = R;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let consumer1 = MapWithConsumer::new(consumer, self.item, &self.map_op);
+        self.base.drive_unindexed(consumer1)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        self.base.opt_len()
+    }
+}
+
+impl<I, T, F, R> IndexedParallelIterator for MapWith<I, T, F>
+where
+    I: IndexedParallelIterator,
+    T: Send + Clone,
+    F: Fn(&mut T, I::Item) -> R + Sync + Send,
+    R: Send,
+{
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        let consumer1 = MapWithConsumer::new(consumer, self.item, &self.map_op);
+        self.base.drive(consumer1)
+    }
+
+    fn len(&self) -> usize {
+        self.base.len()
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        return self.base.with_producer(Callback {
+            callback,
+            item: self.item,
+            map_op: self.map_op,
+        });
+
+        struct Callback<CB, U, F> {
+            callback: CB,
+            item: U,
+            map_op: F,
+        }
+
+        impl<T, U, F, R, CB> ProducerCallback<T> for Callback<CB, U, F>
+        where
+            CB: ProducerCallback<R>,
+            U: Send + Clone,
+            F: Fn(&mut U, T) -> R + Sync,
+            R: Send,
+        {
+            type Output = CB::Output;
+
+            fn callback<P>(self, base: P) -> CB::Output
+            where
+                P: Producer<Item = T>,
+            {
+                let producer = MapWithProducer {
+                    base,
+                    item: self.item,
+                    map_op: &self.map_op,
+                };
+                self.callback.callback(producer)
+            }
+        }
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+
+struct MapWithProducer<'f, P, U, F> {
+    base: P,
+    item: U,
+    map_op: &'f F,
+}
+
+impl<'f, P, U, F, R> Producer for MapWithProducer<'f, P, U, F>
+where
+    P: Producer,
+    U: Send + Clone,
+    F: Fn(&mut U, P::Item) -> R + Sync,
+    R: Send,
+{
+    type Item = R;
+    type IntoIter = MapWithIter<'f, P::IntoIter, U, F>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        MapWithIter {
+            base: self.base.into_iter(),
+            item: self.item,
+            map_op: self.map_op,
+        }
+    }
+
+    fn min_len(&self) -> usize {
+        self.base.min_len()
+    }
+    fn max_len(&self) -> usize {
+        self.base.max_len()
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        let (left, right) = self.base.split_at(index);
+        (
+            MapWithProducer {
+                base: left,
+                item: self.item.clone(),
+                map_op: self.map_op,
+            },
+            MapWithProducer {
+                base: right,
+                item: self.item,
+                map_op: self.map_op,
+            },
+        )
+    }
+
+    fn fold_with<G>(self, folder: G) -> G
+    where
+        G: Folder<Self::Item>,
+    {
+        let folder1 = MapWithFolder {
+            base: folder,
+            item: self.item,
+            map_op: self.map_op,
+        };
+        self.base.fold_with(folder1).base
+    }
+}
+
+struct MapWithIter<'f, I, U, F> {
+    base: I,
+    item: U,
+    map_op: &'f F,
+}
+
+impl<'f, I, U, F, R> Iterator for MapWithIter<'f, I, U, F>
+where
+    I: Iterator,
+    F: Fn(&mut U, I::Item) -> R + Sync,
+    R: Send,
+{
+    type Item = R;
+
+    fn next(&mut self) -> Option<R> {
+        let item = self.base.next()?;
+        Some((self.map_op)(&mut self.item, item))
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.base.size_hint()
+    }
+}
+
+impl<'f, I, U, F, R> DoubleEndedIterator for MapWithIter<'f, I, U, F>
+where
+    I: DoubleEndedIterator,
+    F: Fn(&mut U, I::Item) -> R + Sync,
+    R: Send,
+{
+    fn next_back(&mut self) -> Option<R> {
+        let item = self.base.next_back()?;
+        Some((self.map_op)(&mut self.item, item))
+    }
+}
+
+impl<'f, I, U, F, R> ExactSizeIterator for MapWithIter<'f, I, U, F>
+where
+    I: ExactSizeIterator,
+    F: Fn(&mut U, I::Item) -> R + Sync,
+    R: Send,
+{
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// Consumer implementation
+
+struct MapWithConsumer<'f, C, U, F> {
+    base: C,
+    item: U,
+    map_op: &'f F,
+}
+
+impl<'f, C, U, F> MapWithConsumer<'f, C, U, F> {
+    fn new(base: C, item: U, map_op: &'f F) -> Self {
+        MapWithConsumer { base, item, map_op }
+    }
+}
+
+impl<'f, T, U, R, C, F> Consumer<T> for MapWithConsumer<'f, C, U, F>
+where
+    C: Consumer<R>,
+    U: Send + Clone,
+    F: Fn(&mut U, T) -> R + Sync,
+    R: Send,
+{
+    type Folder = MapWithFolder<'f, C::Folder, U, F>;
+    type Reducer = C::Reducer;
+    type Result = C::Result;
+
+    fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) {
+        let (left, right, reducer) = self.base.split_at(index);
+        (
+            MapWithConsumer::new(left, self.item.clone(), self.map_op),
+            MapWithConsumer::new(right, self.item, self.map_op),
+            reducer,
+        )
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        MapWithFolder {
+            base: self.base.into_folder(),
+            item: self.item,
+            map_op: self.map_op,
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
+
+impl<'f, T, U, R, C, F> UnindexedConsumer<T> for MapWithConsumer<'f, C, U, F>
+where
+    C: UnindexedConsumer<R>,
+    U: Send + Clone,
+    F: Fn(&mut U, T) -> R + Sync,
+    R: Send,
+{
+    fn split_off_left(&self) -> Self {
+        MapWithConsumer::new(self.base.split_off_left(), self.item.clone(), self.map_op)
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        self.base.to_reducer()
+    }
+}
+
+struct MapWithFolder<'f, C, U, F> {
+    base: C,
+    item: U,
+    map_op: &'f F,
+}
+
+impl<'f, T, U, R, C, F> Folder<T> for MapWithFolder<'f, C, U, F>
+where
+    C: Folder<R>,
+    F: Fn(&mut U, T) -> R,
+{
+    type Result = C::Result;
+
+    fn consume(mut self, item: T) -> Self {
+        let mapped_item = (self.map_op)(&mut self.item, item);
+        self.base = self.base.consume(mapped_item);
+        self
+    }
+
+    fn consume_iter<I>(mut self, iter: I) -> Self
+    where
+        I: IntoIterator<Item = T>,
+    {
+        fn with<'f, T, U, R>(
+            item: &'f mut U,
+            map_op: impl Fn(&mut U, T) -> R + 'f,
+        ) -> impl FnMut(T) -> R + 'f {
+            move |x| map_op(item, x)
+        }
+
+        {
+            let mapped_iter = iter.into_iter().map(with(&mut self.item, self.map_op));
+            self.base = self.base.consume_iter(mapped_iter);
+        }
+        self
+    }
+
+    fn complete(self) -> C::Result {
+        self.base.complete()
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
+
+// ------------------------------------------------------------------------------------------------
+
+/// `MapInit` is an iterator that transforms the elements of an underlying iterator.
+///
+/// This struct is created by the [`map_init()`] method on [`ParallelIterator`]
+///
+/// [`map_init()`]: trait.ParallelIterator.html#method.map_init
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Clone)]
+pub struct MapInit<I: ParallelIterator, INIT, F> {
+    base: I,
+    init: INIT,
+    map_op: F,
+}
+
+impl<I: ParallelIterator + Debug, INIT, F> Debug for MapInit<I, INIT, F> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("MapInit").field("base", &self.base).finish()
+    }
+}
+
+impl<I, INIT, F> MapInit<I, INIT, F>
+where
+    I: ParallelIterator,
+{
+    /// Creates a new `MapInit` iterator.
+    pub(super) fn new(base: I, init: INIT, map_op: F) -> Self {
+        MapInit { base, init, map_op }
+    }
+}
+
+impl<I, INIT, T, F, R> ParallelIterator for MapInit<I, INIT, F>
+where
+    I: ParallelIterator,
+    INIT: Fn() -> T + Sync + Send,
+    F: Fn(&mut T, I::Item) -> R + Sync + Send,
+    R: Send,
+{
+    type Item = R;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let consumer1 = MapInitConsumer::new(consumer, &self.init, &self.map_op);
+        self.base.drive_unindexed(consumer1)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        self.base.opt_len()
+    }
+}
+
+impl<I, INIT, T, F, R> IndexedParallelIterator for MapInit<I, INIT, F>
+where
+    I: IndexedParallelIterator,
+    INIT: Fn() -> T + Sync + Send,
+    F: Fn(&mut T, I::Item) -> R + Sync + Send,
+    R: Send,
+{
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        let consumer1 = MapInitConsumer::new(consumer, &self.init, &self.map_op);
+        self.base.drive(consumer1)
+    }
+
+    fn len(&self) -> usize {
+        self.base.len()
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        return self.base.with_producer(Callback {
+            callback,
+            init: self.init,
+            map_op: self.map_op,
+        });
+
+        struct Callback<CB, INIT, F> {
+            callback: CB,
+            init: INIT,
+            map_op: F,
+        }
+
+        impl<T, INIT, U, F, R, CB> ProducerCallback<T> for Callback<CB, INIT, F>
+        where
+            CB: ProducerCallback<R>,
+            INIT: Fn() -> U + Sync,
+            F: Fn(&mut U, T) -> R + Sync,
+            R: Send,
+        {
+            type Output = CB::Output;
+
+            fn callback<P>(self, base: P) -> CB::Output
+            where
+                P: Producer<Item = T>,
+            {
+                let producer = MapInitProducer {
+                    base,
+                    init: &self.init,
+                    map_op: &self.map_op,
+                };
+                self.callback.callback(producer)
+            }
+        }
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+
+struct MapInitProducer<'f, P, INIT, F> {
+    base: P,
+    init: &'f INIT,
+    map_op: &'f F,
+}
+
+impl<'f, P, INIT, U, F, R> Producer for MapInitProducer<'f, P, INIT, F>
+where
+    P: Producer,
+    INIT: Fn() -> U + Sync,
+    F: Fn(&mut U, P::Item) -> R + Sync,
+    R: Send,
+{
+    type Item = R;
+    type IntoIter = MapWithIter<'f, P::IntoIter, U, F>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        MapWithIter {
+            base: self.base.into_iter(),
+            item: (self.init)(),
+            map_op: self.map_op,
+        }
+    }
+
+    fn min_len(&self) -> usize {
+        self.base.min_len()
+    }
+    fn max_len(&self) -> usize {
+        self.base.max_len()
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        let (left, right) = self.base.split_at(index);
+        (
+            MapInitProducer {
+                base: left,
+                init: self.init,
+                map_op: self.map_op,
+            },
+            MapInitProducer {
+                base: right,
+                init: self.init,
+                map_op: self.map_op,
+            },
+        )
+    }
+
+    fn fold_with<G>(self, folder: G) -> G
+    where
+        G: Folder<Self::Item>,
+    {
+        let folder1 = MapWithFolder {
+            base: folder,
+            item: (self.init)(),
+            map_op: self.map_op,
+        };
+        self.base.fold_with(folder1).base
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// Consumer implementation
+
+struct MapInitConsumer<'f, C, INIT, F> {
+    base: C,
+    init: &'f INIT,
+    map_op: &'f F,
+}
+
+impl<'f, C, INIT, F> MapInitConsumer<'f, C, INIT, F> {
+    fn new(base: C, init: &'f INIT, map_op: &'f F) -> Self {
+        MapInitConsumer { base, init, map_op }
+    }
+}
+
+impl<'f, T, INIT, U, R, C, F> Consumer<T> for MapInitConsumer<'f, C, INIT, F>
+where
+    C: Consumer<R>,
+    INIT: Fn() -> U + Sync,
+    F: Fn(&mut U, T) -> R + Sync,
+    R: Send,
+{
+    type Folder = MapWithFolder<'f, C::Folder, U, F>;
+    type Reducer = C::Reducer;
+    type Result = C::Result;
+
+    fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) {
+        let (left, right, reducer) = self.base.split_at(index);
+        (
+            MapInitConsumer::new(left, self.init, self.map_op),
+            MapInitConsumer::new(right, self.init, self.map_op),
+            reducer,
+        )
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        MapWithFolder {
+            base: self.base.into_folder(),
+            item: (self.init)(),
+            map_op: self.map_op,
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
+
+impl<'f, T, INIT, U, R, C, F> UnindexedConsumer<T> for MapInitConsumer<'f, C, INIT, F>
+where
+    C: UnindexedConsumer<R>,
+    INIT: Fn() -> U + Sync,
+    F: Fn(&mut U, T) -> R + Sync,
+    R: Send,
+{
+    fn split_off_left(&self) -> Self {
+        MapInitConsumer::new(self.base.split_off_left(), self.init, self.map_op)
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        self.base.to_reducer()
+    }
+}
diff --git a/src/iter/mod.rs b/src/iter/mod.rs
new file mode 100644
index 0000000..0c82933
--- /dev/null
+++ b/src/iter/mod.rs
@@ -0,0 +1,3121 @@
+//! Traits for writing parallel programs using an iterator-style interface
+//!
+//! You will rarely need to interact with this module directly unless you have
+//! need to name one of the iterator types.
+//!
+//! Parallel iterators make it easy to write iterator-like chains that
+//! execute in parallel: typically all you have to do is convert the
+//! first `.iter()` (or `iter_mut()`, `into_iter()`, etc) method into
+//! `par_iter()` (or `par_iter_mut()`, `into_par_iter()`, etc). For
+//! example, to compute the sum of the squares of a sequence of
+//! integers, one might write:
+//!
+//! ```rust
+//! use rayon::prelude::*;
+//! fn sum_of_squares(input: &[i32]) -> i32 {
+//!     input.par_iter()
+//!          .map(|i| i * i)
+//!          .sum()
+//! }
+//! ```
+//!
+//! Or, to increment all the integers in a slice, you could write:
+//!
+//! ```rust
+//! use rayon::prelude::*;
+//! fn increment_all(input: &mut [i32]) {
+//!     input.par_iter_mut()
+//!          .for_each(|p| *p += 1);
+//! }
+//! ```
+//!
+//! To use parallel iterators, first import the traits by adding
+//! something like `use rayon::prelude::*` to your module. You can
+//! then call `par_iter`, `par_iter_mut`, or `into_par_iter` to get a
+//! parallel iterator. Like a [regular iterator][], parallel
+//! iterators work by first constructing a computation and then
+//! executing it.
+//!
+//! In addition to `par_iter()` and friends, some types offer other
+//! ways to create (or consume) parallel iterators:
+//!
+//! - Slices (`&[T]`, `&mut [T]`) offer methods like `par_split` and
+//!   `par_windows`, as well as various parallel sorting
+//!   operations. See [the `ParallelSlice` trait] for the full list.
+//! - Strings (`&str`) offer methods like `par_split` and `par_lines`.
+//!   See [the `ParallelString` trait] for the full list.
+//! - Various collections offer [`par_extend`], which grows a
+//!   collection given a parallel iterator. (If you don't have a
+//!   collection to extend, you can use [`collect()`] to create a new
+//!   one from scratch.)
+//!
+//! [the `ParallelSlice` trait]: ../slice/trait.ParallelSlice.html
+//! [the `ParallelString` trait]: ../str/trait.ParallelString.html
+//! [`par_extend`]: trait.ParallelExtend.html
+//! [`collect()`]: trait.ParallelIterator.html#method.collect
+//!
+//! To see the full range of methods available on parallel iterators,
+//! check out the [`ParallelIterator`] and [`IndexedParallelIterator`]
+//! traits.
+//!
+//! If you'd like to build a custom parallel iterator, or to write your own
+//! combinator, then check out the [split] function and the [plumbing] module.
+//!
+//! [regular iterator]: http://doc.rust-lang.org/std/iter/trait.Iterator.html
+//! [`ParallelIterator`]: trait.ParallelIterator.html
+//! [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html
+//! [split]: fn.split.html
+//! [plumbing]: plumbing/index.html
+//!
+//! Note: Several of the `ParallelIterator` methods rely on a `Try` trait which
+//! has been deliberately obscured from the public API.  This trait is intended
+//! to mirror the unstable `std::ops::Try` with implementations for `Option` and
+//! `Result`, where `Some`/`Ok` values will let those iterators continue, but
+//! `None`/`Err` values will exit early.
+//!
+//! A note about object safety: It is currently _not_ possible to wrap
+//! a `ParallelIterator` (or any trait that depends on it) using a
+//! `Box<dyn ParallelIterator>` or other kind of dynamic allocation,
+//! because `ParallelIterator` is **not object-safe**.
+//! (This keeps the implementation simpler and allows extra optimizations.)
+
+use self::plumbing::*;
+use self::private::Try;
+pub use either::Either;
+use std::cmp::{self, Ordering};
+use std::iter::{Product, Sum};
+use std::ops::{Fn, RangeBounds};
+
+pub mod plumbing;
+
+#[cfg(test)]
+mod test;
+
+// There is a method to the madness here:
+//
+// - These modules are private but expose certain types to the end-user
+//   (e.g., `enumerate::Enumerate`) -- specifically, the types that appear in the
+//   public API surface of the `ParallelIterator` traits.
+// - In **this** module, those public types are always used unprefixed, which forces
+//   us to add a `pub use` and helps identify if we missed anything.
+// - In contrast, items that appear **only** in the body of a method,
+//   e.g. `find::find()`, are always used **prefixed**, so that they
+//   can be readily distinguished.
+
+mod chain;
+mod chunks;
+mod cloned;
+mod collect;
+mod copied;
+mod empty;
+mod enumerate;
+mod extend;
+mod filter;
+mod filter_map;
+mod find;
+mod find_first_last;
+mod flat_map;
+mod flat_map_iter;
+mod flatten;
+mod flatten_iter;
+mod fold;
+mod for_each;
+mod from_par_iter;
+mod inspect;
+mod interleave;
+mod interleave_shortest;
+mod intersperse;
+mod len;
+mod map;
+mod map_with;
+mod multizip;
+mod noop;
+mod once;
+mod panic_fuse;
+mod par_bridge;
+mod positions;
+mod product;
+mod reduce;
+mod repeat;
+mod rev;
+mod skip;
+mod splitter;
+mod sum;
+mod take;
+mod try_fold;
+mod try_reduce;
+mod try_reduce_with;
+mod unzip;
+mod update;
+mod while_some;
+mod zip;
+mod zip_eq;
+
+pub use self::{
+    chain::Chain,
+    chunks::Chunks,
+    cloned::Cloned,
+    copied::Copied,
+    empty::{empty, Empty},
+    enumerate::Enumerate,
+    filter::Filter,
+    filter_map::FilterMap,
+    flat_map::FlatMap,
+    flat_map_iter::FlatMapIter,
+    flatten::Flatten,
+    flatten_iter::FlattenIter,
+    fold::{Fold, FoldWith},
+    inspect::Inspect,
+    interleave::Interleave,
+    interleave_shortest::InterleaveShortest,
+    intersperse::Intersperse,
+    len::{MaxLen, MinLen},
+    map::Map,
+    map_with::{MapInit, MapWith},
+    multizip::MultiZip,
+    once::{once, Once},
+    panic_fuse::PanicFuse,
+    par_bridge::{IterBridge, ParallelBridge},
+    positions::Positions,
+    repeat::{repeat, repeatn, Repeat, RepeatN},
+    rev::Rev,
+    skip::Skip,
+    splitter::{split, Split},
+    take::Take,
+    try_fold::{TryFold, TryFoldWith},
+    update::Update,
+    while_some::WhileSome,
+    zip::Zip,
+    zip_eq::ZipEq,
+};
+
+mod step_by;
+#[cfg(step_by)]
+pub use self::step_by::StepBy;
+
+/// `IntoParallelIterator` implements the conversion to a [`ParallelIterator`].
+///
+/// By implementing `IntoParallelIterator` for a type, you define how it will
+/// transformed into an iterator. This is a parallel version of the standard
+/// library's [`std::iter::IntoIterator`] trait.
+///
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+/// [`std::iter::IntoIterator`]: https://doc.rust-lang.org/std/iter/trait.IntoIterator.html
+pub trait IntoParallelIterator {
+    /// The parallel iterator type that will be created.
+    type Iter: ParallelIterator<Item = Self::Item>;
+
+    /// The type of item that the parallel iterator will produce.
+    type Item: Send;
+
+    /// Converts `self` into a parallel iterator.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// println!("counting in parallel:");
+    /// (0..100).into_par_iter()
+    ///     .for_each(|i| println!("{}", i));
+    /// ```
+    ///
+    /// This conversion is often implicit for arguments to methods like [`zip`].
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let v: Vec<_> = (0..5).into_par_iter().zip(5..10).collect();
+    /// assert_eq!(v, [(0, 5), (1, 6), (2, 7), (3, 8), (4, 9)]);
+    /// ```
+    ///
+    /// [`zip`]: trait.IndexedParallelIterator.html#method.zip
+    fn into_par_iter(self) -> Self::Iter;
+}
+
+/// `IntoParallelRefIterator` implements the conversion to a
+/// [`ParallelIterator`], providing shared references to the data.
+///
+/// This is a parallel version of the `iter()` method
+/// defined by various collections.
+///
+/// This trait is automatically implemented
+/// `for I where &I: IntoParallelIterator`. In most cases, users
+/// will want to implement [`IntoParallelIterator`] rather than implement
+/// this trait directly.
+///
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+/// [`IntoParallelIterator`]: trait.IntoParallelIterator.html
+pub trait IntoParallelRefIterator<'data> {
+    /// The type of the parallel iterator that will be returned.
+    type Iter: ParallelIterator<Item = Self::Item>;
+
+    /// The type of item that the parallel iterator will produce.
+    /// This will typically be an `&'data T` reference type.
+    type Item: Send + 'data;
+
+    /// Converts `self` into a parallel iterator.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let v: Vec<_> = (0..100).collect();
+    /// assert_eq!(v.par_iter().sum::<i32>(), 100 * 99 / 2);
+    ///
+    /// // `v.par_iter()` is shorthand for `(&v).into_par_iter()`,
+    /// // producing the exact same references.
+    /// assert!(v.par_iter().zip(&v)
+    ///          .all(|(a, b)| std::ptr::eq(a, b)));
+    /// ```
+    fn par_iter(&'data self) -> Self::Iter;
+}
+
+impl<'data, I: 'data + ?Sized> IntoParallelRefIterator<'data> for I
+where
+    &'data I: IntoParallelIterator,
+{
+    type Iter = <&'data I as IntoParallelIterator>::Iter;
+    type Item = <&'data I as IntoParallelIterator>::Item;
+
+    fn par_iter(&'data self) -> Self::Iter {
+        self.into_par_iter()
+    }
+}
+
+/// `IntoParallelRefMutIterator` implements the conversion to a
+/// [`ParallelIterator`], providing mutable references to the data.
+///
+/// This is a parallel version of the `iter_mut()` method
+/// defined by various collections.
+///
+/// This trait is automatically implemented
+/// `for I where &mut I: IntoParallelIterator`. In most cases, users
+/// will want to implement [`IntoParallelIterator`] rather than implement
+/// this trait directly.
+///
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+/// [`IntoParallelIterator`]: trait.IntoParallelIterator.html
+pub trait IntoParallelRefMutIterator<'data> {
+    /// The type of iterator that will be created.
+    type Iter: ParallelIterator<Item = Self::Item>;
+
+    /// The type of item that will be produced; this is typically an
+    /// `&'data mut T` reference.
+    type Item: Send + 'data;
+
+    /// Creates the parallel iterator from `self`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let mut v = vec![0usize; 5];
+    /// v.par_iter_mut().enumerate().for_each(|(i, x)| *x = i);
+    /// assert_eq!(v, [0, 1, 2, 3, 4]);
+    /// ```
+    fn par_iter_mut(&'data mut self) -> Self::Iter;
+}
+
+impl<'data, I: 'data + ?Sized> IntoParallelRefMutIterator<'data> for I
+where
+    &'data mut I: IntoParallelIterator,
+{
+    type Iter = <&'data mut I as IntoParallelIterator>::Iter;
+    type Item = <&'data mut I as IntoParallelIterator>::Item;
+
+    fn par_iter_mut(&'data mut self) -> Self::Iter {
+        self.into_par_iter()
+    }
+}
+
+/// Parallel version of the standard iterator trait.
+///
+/// The combinators on this trait are available on **all** parallel
+/// iterators.  Additional methods can be found on the
+/// [`IndexedParallelIterator`] trait: those methods are only
+/// available for parallel iterators where the number of items is
+/// known in advance (so, e.g., after invoking `filter`, those methods
+/// become unavailable).
+///
+/// For examples of using parallel iterators, see [the docs on the
+/// `iter` module][iter].
+///
+/// [iter]: index.html
+/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html
+pub trait ParallelIterator: Sized + Send {
+    /// The type of item that this parallel iterator produces.
+    /// For example, if you use the [`for_each`] method, this is the type of
+    /// item that your closure will be invoked with.
+    ///
+    /// [`for_each`]: #method.for_each
+    type Item: Send;
+
+    /// Executes `OP` on each item produced by the iterator, in parallel.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// (0..100).into_par_iter().for_each(|x| println!("{:?}", x));
+    /// ```
+    fn for_each<OP>(self, op: OP)
+    where
+        OP: Fn(Self::Item) + Sync + Send,
+    {
+        for_each::for_each(self, &op)
+    }
+
+    /// Executes `OP` on the given `init` value with each item produced by
+    /// the iterator, in parallel.
+    ///
+    /// The `init` value will be cloned only as needed to be paired with
+    /// the group of items in each rayon job.  It does not require the type
+    /// to be `Sync`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use std::sync::mpsc::channel;
+    /// use rayon::prelude::*;
+    ///
+    /// let (sender, receiver) = channel();
+    ///
+    /// (0..5).into_par_iter().for_each_with(sender, |s, x| s.send(x).unwrap());
+    ///
+    /// let mut res: Vec<_> = receiver.iter().collect();
+    ///
+    /// res.sort();
+    ///
+    /// assert_eq!(&res[..], &[0, 1, 2, 3, 4])
+    /// ```
+    fn for_each_with<OP, T>(self, init: T, op: OP)
+    where
+        OP: Fn(&mut T, Self::Item) + Sync + Send,
+        T: Send + Clone,
+    {
+        self.map_with(init, op).collect()
+    }
+
+    /// Executes `OP` on a value returned by `init` with each item produced by
+    /// the iterator, in parallel.
+    ///
+    /// The `init` function will be called only as needed for a value to be
+    /// paired with the group of items in each rayon job.  There is no
+    /// constraint on that returned type at all!
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rand::Rng;
+    /// use rayon::prelude::*;
+    ///
+    /// let mut v = vec![0u8; 1_000_000];
+    ///
+    /// v.par_chunks_mut(1000)
+    ///     .for_each_init(
+    ///         || rand::thread_rng(),
+    ///         |rng, chunk| rng.fill(chunk),
+    ///     );
+    ///
+    /// // There's a remote chance that this will fail...
+    /// for i in 0u8..=255 {
+    ///     assert!(v.contains(&i));
+    /// }
+    /// ```
+    fn for_each_init<OP, INIT, T>(self, init: INIT, op: OP)
+    where
+        OP: Fn(&mut T, Self::Item) + Sync + Send,
+        INIT: Fn() -> T + Sync + Send,
+    {
+        self.map_init(init, op).collect()
+    }
+
+    /// Executes a fallible `OP` on each item produced by the iterator, in parallel.
+    ///
+    /// If the `OP` returns `Result::Err` or `Option::None`, we will attempt to
+    /// stop processing the rest of the items in the iterator as soon as
+    /// possible, and we will return that terminating value.  Otherwise, we will
+    /// return an empty `Result::Ok(())` or `Option::Some(())`.  If there are
+    /// multiple errors in parallel, it is not specified which will be returned.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// use std::io::{self, Write};
+    ///
+    /// // This will stop iteration early if there's any write error, like
+    /// // having piped output get closed on the other end.
+    /// (0..100).into_par_iter()
+    ///     .try_for_each(|x| writeln!(io::stdout(), "{:?}", x))
+    ///     .expect("expected no write errors");
+    /// ```
+    fn try_for_each<OP, R>(self, op: OP) -> R
+    where
+        OP: Fn(Self::Item) -> R + Sync + Send,
+        R: Try<Ok = ()> + Send,
+    {
+        fn ok<R: Try<Ok = ()>>(_: (), _: ()) -> R {
+            R::from_ok(())
+        }
+
+        self.map(op).try_reduce(<()>::default, ok)
+    }
+
+    /// Executes a fallible `OP` on the given `init` value with each item
+    /// produced by the iterator, in parallel.
+    ///
+    /// This combines the `init` semantics of [`for_each_with()`] and the
+    /// failure semantics of [`try_for_each()`].
+    ///
+    /// [`for_each_with()`]: #method.for_each_with
+    /// [`try_for_each()`]: #method.try_for_each
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use std::sync::mpsc::channel;
+    /// use rayon::prelude::*;
+    ///
+    /// let (sender, receiver) = channel();
+    ///
+    /// (0..5).into_par_iter()
+    ///     .try_for_each_with(sender, |s, x| s.send(x))
+    ///     .expect("expected no send errors");
+    ///
+    /// let mut res: Vec<_> = receiver.iter().collect();
+    ///
+    /// res.sort();
+    ///
+    /// assert_eq!(&res[..], &[0, 1, 2, 3, 4])
+    /// ```
+    fn try_for_each_with<OP, T, R>(self, init: T, op: OP) -> R
+    where
+        OP: Fn(&mut T, Self::Item) -> R + Sync + Send,
+        T: Send + Clone,
+        R: Try<Ok = ()> + Send,
+    {
+        fn ok<R: Try<Ok = ()>>(_: (), _: ()) -> R {
+            R::from_ok(())
+        }
+
+        self.map_with(init, op).try_reduce(<()>::default, ok)
+    }
+
+    /// Executes a fallible `OP` on a value returned by `init` with each item
+    /// produced by the iterator, in parallel.
+    ///
+    /// This combines the `init` semantics of [`for_each_init()`] and the
+    /// failure semantics of [`try_for_each()`].
+    ///
+    /// [`for_each_init()`]: #method.for_each_init
+    /// [`try_for_each()`]: #method.try_for_each
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rand::Rng;
+    /// use rayon::prelude::*;
+    ///
+    /// let mut v = vec![0u8; 1_000_000];
+    ///
+    /// v.par_chunks_mut(1000)
+    ///     .try_for_each_init(
+    ///         || rand::thread_rng(),
+    ///         |rng, chunk| rng.try_fill(chunk),
+    ///     )
+    ///     .expect("expected no rand errors");
+    ///
+    /// // There's a remote chance that this will fail...
+    /// for i in 0u8..=255 {
+    ///     assert!(v.contains(&i));
+    /// }
+    /// ```
+    fn try_for_each_init<OP, INIT, T, R>(self, init: INIT, op: OP) -> R
+    where
+        OP: Fn(&mut T, Self::Item) -> R + Sync + Send,
+        INIT: Fn() -> T + Sync + Send,
+        R: Try<Ok = ()> + Send,
+    {
+        fn ok<R: Try<Ok = ()>>(_: (), _: ()) -> R {
+            R::from_ok(())
+        }
+
+        self.map_init(init, op).try_reduce(<()>::default, ok)
+    }
+
+    /// Counts the number of items in this parallel iterator.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let count = (0..100).into_par_iter().count();
+    ///
+    /// assert_eq!(count, 100);
+    /// ```
+    fn count(self) -> usize {
+        fn one<T>(_: T) -> usize {
+            1
+        }
+
+        self.map(one).sum()
+    }
+
+    /// Applies `map_op` to each item of this iterator, producing a new
+    /// iterator with the results.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let mut par_iter = (0..5).into_par_iter().map(|x| x * 2);
+    ///
+    /// let doubles: Vec<_> = par_iter.collect();
+    ///
+    /// assert_eq!(&doubles[..], &[0, 2, 4, 6, 8]);
+    /// ```
+    fn map<F, R>(self, map_op: F) -> Map<Self, F>
+    where
+        F: Fn(Self::Item) -> R + Sync + Send,
+        R: Send,
+    {
+        Map::new(self, map_op)
+    }
+
+    /// Applies `map_op` to the given `init` value with each item of this
+    /// iterator, producing a new iterator with the results.
+    ///
+    /// The `init` value will be cloned only as needed to be paired with
+    /// the group of items in each rayon job.  It does not require the type
+    /// to be `Sync`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use std::sync::mpsc::channel;
+    /// use rayon::prelude::*;
+    ///
+    /// let (sender, receiver) = channel();
+    ///
+    /// let a: Vec<_> = (0..5)
+    ///                 .into_par_iter()            // iterating over i32
+    ///                 .map_with(sender, |s, x| {
+    ///                     s.send(x).unwrap();     // sending i32 values through the channel
+    ///                     x                       // returning i32
+    ///                 })
+    ///                 .collect();                 // collecting the returned values into a vector
+    ///
+    /// let mut b: Vec<_> = receiver.iter()         // iterating over the values in the channel
+    ///                             .collect();     // and collecting them
+    /// b.sort();
+    ///
+    /// assert_eq!(a, b);
+    /// ```
+    fn map_with<F, T, R>(self, init: T, map_op: F) -> MapWith<Self, T, F>
+    where
+        F: Fn(&mut T, Self::Item) -> R + Sync + Send,
+        T: Send + Clone,
+        R: Send,
+    {
+        MapWith::new(self, init, map_op)
+    }
+
+    /// Applies `map_op` to a value returned by `init` with each item of this
+    /// iterator, producing a new iterator with the results.
+    ///
+    /// The `init` function will be called only as needed for a value to be
+    /// paired with the group of items in each rayon job.  There is no
+    /// constraint on that returned type at all!
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rand::Rng;
+    /// use rayon::prelude::*;
+    ///
+    /// let a: Vec<_> = (1i32..1_000_000)
+    ///     .into_par_iter()
+    ///     .map_init(
+    ///         || rand::thread_rng(),  // get the thread-local RNG
+    ///         |rng, x| if rng.gen() { // randomly negate items
+    ///             -x
+    ///         } else {
+    ///             x
+    ///         },
+    ///     ).collect();
+    ///
+    /// // There's a remote chance that this will fail...
+    /// assert!(a.iter().any(|&x| x < 0));
+    /// assert!(a.iter().any(|&x| x > 0));
+    /// ```
+    fn map_init<F, INIT, T, R>(self, init: INIT, map_op: F) -> MapInit<Self, INIT, F>
+    where
+        F: Fn(&mut T, Self::Item) -> R + Sync + Send,
+        INIT: Fn() -> T + Sync + Send,
+        R: Send,
+    {
+        MapInit::new(self, init, map_op)
+    }
+
+    /// Creates an iterator which clones all of its elements.  This may be
+    /// useful when you have an iterator over `&T`, but you need `T`, and
+    /// that type implements `Clone`. See also [`copied()`].
+    ///
+    /// [`copied()`]: #method.copied
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let a = [1, 2, 3];
+    ///
+    /// let v_cloned: Vec<_> = a.par_iter().cloned().collect();
+    ///
+    /// // cloned is the same as .map(|&x| x), for integers
+    /// let v_map: Vec<_> = a.par_iter().map(|&x| x).collect();
+    ///
+    /// assert_eq!(v_cloned, vec![1, 2, 3]);
+    /// assert_eq!(v_map, vec![1, 2, 3]);
+    /// ```
+    fn cloned<'a, T>(self) -> Cloned<Self>
+    where
+        T: 'a + Clone + Send,
+        Self: ParallelIterator<Item = &'a T>,
+    {
+        Cloned::new(self)
+    }
+
+    /// Creates an iterator which copies all of its elements.  This may be
+    /// useful when you have an iterator over `&T`, but you need `T`, and
+    /// that type implements `Copy`. See also [`cloned()`].
+    ///
+    /// [`cloned()`]: #method.cloned
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let a = [1, 2, 3];
+    ///
+    /// let v_copied: Vec<_> = a.par_iter().copied().collect();
+    ///
+    /// // copied is the same as .map(|&x| x), for integers
+    /// let v_map: Vec<_> = a.par_iter().map(|&x| x).collect();
+    ///
+    /// assert_eq!(v_copied, vec![1, 2, 3]);
+    /// assert_eq!(v_map, vec![1, 2, 3]);
+    /// ```
+    fn copied<'a, T>(self) -> Copied<Self>
+    where
+        T: 'a + Copy + Send,
+        Self: ParallelIterator<Item = &'a T>,
+    {
+        Copied::new(self)
+    }
+
+    /// Applies `inspect_op` to a reference to each item of this iterator,
+    /// producing a new iterator passing through the original items.  This is
+    /// often useful for debugging to see what's happening in iterator stages.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let a = [1, 4, 2, 3];
+    ///
+    /// // this iterator sequence is complex.
+    /// let sum = a.par_iter()
+    ///             .cloned()
+    ///             .filter(|&x| x % 2 == 0)
+    ///             .reduce(|| 0, |sum, i| sum + i);
+    ///
+    /// println!("{}", sum);
+    ///
+    /// // let's add some inspect() calls to investigate what's happening
+    /// let sum = a.par_iter()
+    ///             .cloned()
+    ///             .inspect(|x| println!("about to filter: {}", x))
+    ///             .filter(|&x| x % 2 == 0)
+    ///             .inspect(|x| println!("made it through filter: {}", x))
+    ///             .reduce(|| 0, |sum, i| sum + i);
+    ///
+    /// println!("{}", sum);
+    /// ```
+    fn inspect<OP>(self, inspect_op: OP) -> Inspect<Self, OP>
+    where
+        OP: Fn(&Self::Item) + Sync + Send,
+    {
+        Inspect::new(self, inspect_op)
+    }
+
+    /// Mutates each item of this iterator before yielding it.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let par_iter = (0..5).into_par_iter().update(|x| {*x *= 2;});
+    ///
+    /// let doubles: Vec<_> = par_iter.collect();
+    ///
+    /// assert_eq!(&doubles[..], &[0, 2, 4, 6, 8]);
+    /// ```
+    fn update<F>(self, update_op: F) -> Update<Self, F>
+    where
+        F: Fn(&mut Self::Item) + Sync + Send,
+    {
+        Update::new(self, update_op)
+    }
+
+    /// Applies `filter_op` to each item of this iterator, producing a new
+    /// iterator with only the items that gave `true` results.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let mut par_iter = (0..10).into_par_iter().filter(|x| x % 2 == 0);
+    ///
+    /// let even_numbers: Vec<_> = par_iter.collect();
+    ///
+    /// assert_eq!(&even_numbers[..], &[0, 2, 4, 6, 8]);
+    /// ```
+    fn filter<P>(self, filter_op: P) -> Filter<Self, P>
+    where
+        P: Fn(&Self::Item) -> bool + Sync + Send,
+    {
+        Filter::new(self, filter_op)
+    }
+
+    /// Applies `filter_op` to each item of this iterator to get an `Option`,
+    /// producing a new iterator with only the items from `Some` results.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let mut par_iter = (0..10).into_par_iter()
+    ///                         .filter_map(|x| {
+    ///                             if x % 2 == 0 { Some(x * 3) }
+    ///                             else { None }
+    ///                         });
+    ///
+    /// let even_numbers: Vec<_> = par_iter.collect();
+    ///
+    /// assert_eq!(&even_numbers[..], &[0, 6, 12, 18, 24]);
+    /// ```
+    fn filter_map<P, R>(self, filter_op: P) -> FilterMap<Self, P>
+    where
+        P: Fn(Self::Item) -> Option<R> + Sync + Send,
+        R: Send,
+    {
+        FilterMap::new(self, filter_op)
+    }
+
+    /// Applies `map_op` to each item of this iterator to get nested parallel iterators,
+    /// producing a new parallel iterator that flattens these back into one.
+    ///
+    /// See also [`flat_map_iter`](#method.flat_map_iter).
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let a = [[1, 2], [3, 4], [5, 6], [7, 8]];
+    ///
+    /// let par_iter = a.par_iter().cloned().flat_map(|a| a.to_vec());
+    ///
+    /// let vec: Vec<_> = par_iter.collect();
+    ///
+    /// assert_eq!(&vec[..], &[1, 2, 3, 4, 5, 6, 7, 8]);
+    /// ```
+    fn flat_map<F, PI>(self, map_op: F) -> FlatMap<Self, F>
+    where
+        F: Fn(Self::Item) -> PI + Sync + Send,
+        PI: IntoParallelIterator,
+    {
+        FlatMap::new(self, map_op)
+    }
+
+    /// Applies `map_op` to each item of this iterator to get nested serial iterators,
+    /// producing a new parallel iterator that flattens these back into one.
+    ///
+    /// # `flat_map_iter` versus `flat_map`
+    ///
+    /// These two methods are similar but behave slightly differently. With [`flat_map`],
+    /// each of the nested iterators must be a parallel iterator, and they will be further
+    /// split up with nested parallelism. With `flat_map_iter`, each nested iterator is a
+    /// sequential `Iterator`, and we only parallelize _between_ them, while the items
+    /// produced by each nested iterator are processed sequentially.
+    ///
+    /// When choosing between these methods, consider whether nested parallelism suits the
+    /// potential iterators at hand. If there's little computation involved, or its length
+    /// is much less than the outer parallel iterator, then it may perform better to avoid
+    /// the overhead of parallelism, just flattening sequentially with `flat_map_iter`.
+    /// If there is a lot of computation, potentially outweighing the outer parallel
+    /// iterator, then the nested parallelism of `flat_map` may be worthwhile.
+    ///
+    /// [`flat_map`]: #method.flat_map
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// use std::cell::RefCell;
+    ///
+    /// let a = [[1, 2], [3, 4], [5, 6], [7, 8]];
+    ///
+    /// let par_iter = a.par_iter().flat_map_iter(|a| {
+    ///     // The serial iterator doesn't have to be thread-safe, just its items.
+    ///     let cell_iter = RefCell::new(a.iter().cloned());
+    ///     std::iter::from_fn(move || cell_iter.borrow_mut().next())
+    /// });
+    ///
+    /// let vec: Vec<_> = par_iter.collect();
+    ///
+    /// assert_eq!(&vec[..], &[1, 2, 3, 4, 5, 6, 7, 8]);
+    /// ```
+    fn flat_map_iter<F, SI>(self, map_op: F) -> FlatMapIter<Self, F>
+    where
+        F: Fn(Self::Item) -> SI + Sync + Send,
+        SI: IntoIterator,
+        SI::Item: Send,
+    {
+        FlatMapIter::new(self, map_op)
+    }
+
+    /// An adaptor that flattens parallel-iterable `Item`s into one large iterator.
+    ///
+    /// See also [`flatten_iter`](#method.flatten_iter).
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let x: Vec<Vec<_>> = vec![vec![1, 2], vec![3, 4]];
+    /// let y: Vec<_> = x.into_par_iter().flatten().collect();
+    ///
+    /// assert_eq!(y, vec![1, 2, 3, 4]);
+    /// ```
+    fn flatten(self) -> Flatten<Self>
+    where
+        Self::Item: IntoParallelIterator,
+    {
+        Flatten::new(self)
+    }
+
+    /// An adaptor that flattens serial-iterable `Item`s into one large iterator.
+    ///
+    /// See also [`flatten`](#method.flatten) and the analagous comparison of
+    /// [`flat_map_iter` versus `flat_map`](#flat_map_iter-versus-flat_map).
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let x: Vec<Vec<_>> = vec![vec![1, 2], vec![3, 4]];
+    /// let iters: Vec<_> = x.into_iter().map(Vec::into_iter).collect();
+    /// let y: Vec<_> = iters.into_par_iter().flatten_iter().collect();
+    ///
+    /// assert_eq!(y, vec![1, 2, 3, 4]);
+    /// ```
+    fn flatten_iter(self) -> FlattenIter<Self>
+    where
+        Self::Item: IntoIterator,
+        <Self::Item as IntoIterator>::Item: Send,
+    {
+        FlattenIter::new(self)
+    }
+
+    /// Reduces the items in the iterator into one item using `op`.
+    /// The argument `identity` should be a closure that can produce
+    /// "identity" value which may be inserted into the sequence as
+    /// needed to create opportunities for parallel execution. So, for
+    /// example, if you are doing a summation, then `identity()` ought
+    /// to produce something that represents the zero for your type
+    /// (but consider just calling `sum()` in that case).
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// // Iterate over a sequence of pairs `(x0, y0), ..., (xN, yN)`
+    /// // and use reduce to compute one pair `(x0 + ... + xN, y0 + ... + yN)`
+    /// // where the first/second elements are summed separately.
+    /// use rayon::prelude::*;
+    /// let sums = [(0, 1), (5, 6), (16, 2), (8, 9)]
+    ///            .par_iter()        // iterating over &(i32, i32)
+    ///            .cloned()          // iterating over (i32, i32)
+    ///            .reduce(|| (0, 0), // the "identity" is 0 in both columns
+    ///                    |a, b| (a.0 + b.0, a.1 + b.1));
+    /// assert_eq!(sums, (0 + 5 + 16 + 8, 1 + 6 + 2 + 9));
+    /// ```
+    ///
+    /// **Note:** unlike a sequential `fold` operation, the order in
+    /// which `op` will be applied to reduce the result is not fully
+    /// specified. So `op` should be [associative] or else the results
+    /// will be non-deterministic. And of course `identity()` should
+    /// produce a true identity.
+    ///
+    /// [associative]: https://en.wikipedia.org/wiki/Associative_property
+    fn reduce<OP, ID>(self, identity: ID, op: OP) -> Self::Item
+    where
+        OP: Fn(Self::Item, Self::Item) -> Self::Item + Sync + Send,
+        ID: Fn() -> Self::Item + Sync + Send,
+    {
+        reduce::reduce(self, identity, op)
+    }
+
+    /// Reduces the items in the iterator into one item using `op`.
+    /// If the iterator is empty, `None` is returned; otherwise,
+    /// `Some` is returned.
+    ///
+    /// This version of `reduce` is simple but somewhat less
+    /// efficient. If possible, it is better to call `reduce()`, which
+    /// requires an identity element.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// let sums = [(0, 1), (5, 6), (16, 2), (8, 9)]
+    ///            .par_iter()        // iterating over &(i32, i32)
+    ///            .cloned()          // iterating over (i32, i32)
+    ///            .reduce_with(|a, b| (a.0 + b.0, a.1 + b.1))
+    ///            .unwrap();
+    /// assert_eq!(sums, (0 + 5 + 16 + 8, 1 + 6 + 2 + 9));
+    /// ```
+    ///
+    /// **Note:** unlike a sequential `fold` operation, the order in
+    /// which `op` will be applied to reduce the result is not fully
+    /// specified. So `op` should be [associative] or else the results
+    /// will be non-deterministic.
+    ///
+    /// [associative]: https://en.wikipedia.org/wiki/Associative_property
+    fn reduce_with<OP>(self, op: OP) -> Option<Self::Item>
+    where
+        OP: Fn(Self::Item, Self::Item) -> Self::Item + Sync + Send,
+    {
+        fn opt_fold<T>(op: impl Fn(T, T) -> T) -> impl Fn(Option<T>, T) -> Option<T> {
+            move |opt_a, b| match opt_a {
+                Some(a) => Some(op(a, b)),
+                None => Some(b),
+            }
+        }
+
+        fn opt_reduce<T>(op: impl Fn(T, T) -> T) -> impl Fn(Option<T>, Option<T>) -> Option<T> {
+            move |opt_a, opt_b| match (opt_a, opt_b) {
+                (Some(a), Some(b)) => Some(op(a, b)),
+                (Some(v), None) | (None, Some(v)) => Some(v),
+                (None, None) => None,
+            }
+        }
+
+        self.fold(<_>::default, opt_fold(&op))
+            .reduce(<_>::default, opt_reduce(&op))
+    }
+
+    /// Reduces the items in the iterator into one item using a fallible `op`.
+    /// The `identity` argument is used the same way as in [`reduce()`].
+    ///
+    /// [`reduce()`]: #method.reduce
+    ///
+    /// If a `Result::Err` or `Option::None` item is found, or if `op` reduces
+    /// to one, we will attempt to stop processing the rest of the items in the
+    /// iterator as soon as possible, and we will return that terminating value.
+    /// Otherwise, we will return the final reduced `Result::Ok(T)` or
+    /// `Option::Some(T)`.  If there are multiple errors in parallel, it is not
+    /// specified which will be returned.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// // Compute the sum of squares, being careful about overflow.
+    /// fn sum_squares<I: IntoParallelIterator<Item = i32>>(iter: I) -> Option<i32> {
+    ///     iter.into_par_iter()
+    ///         .map(|i| i.checked_mul(i))            // square each item,
+    ///         .try_reduce(|| 0, i32::checked_add)   // and add them up!
+    /// }
+    /// assert_eq!(sum_squares(0..5), Some(0 + 1 + 4 + 9 + 16));
+    ///
+    /// // The sum might overflow
+    /// assert_eq!(sum_squares(0..10_000), None);
+    ///
+    /// // Or the squares might overflow before it even reaches `try_reduce`
+    /// assert_eq!(sum_squares(1_000_000..1_000_001), None);
+    /// ```
+    fn try_reduce<T, OP, ID>(self, identity: ID, op: OP) -> Self::Item
+    where
+        OP: Fn(T, T) -> Self::Item + Sync + Send,
+        ID: Fn() -> T + Sync + Send,
+        Self::Item: Try<Ok = T>,
+    {
+        try_reduce::try_reduce(self, identity, op)
+    }
+
+    /// Reduces the items in the iterator into one item using a fallible `op`.
+    ///
+    /// Like [`reduce_with()`], if the iterator is empty, `None` is returned;
+    /// otherwise, `Some` is returned.  Beyond that, it behaves like
+    /// [`try_reduce()`] for handling `Err`/`None`.
+    ///
+    /// [`reduce_with()`]: #method.reduce_with
+    /// [`try_reduce()`]: #method.try_reduce
+    ///
+    /// For instance, with `Option` items, the return value may be:
+    /// - `None`, the iterator was empty
+    /// - `Some(None)`, we stopped after encountering `None`.
+    /// - `Some(Some(x))`, the entire iterator reduced to `x`.
+    ///
+    /// With `Result` items, the nesting is more obvious:
+    /// - `None`, the iterator was empty
+    /// - `Some(Err(e))`, we stopped after encountering an error `e`.
+    /// - `Some(Ok(x))`, the entire iterator reduced to `x`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let files = ["/dev/null", "/does/not/exist"];
+    ///
+    /// // Find the biggest file
+    /// files.into_par_iter()
+    ///     .map(|path| std::fs::metadata(path).map(|m| (path, m.len())))
+    ///     .try_reduce_with(|a, b| {
+    ///         Ok(if a.1 >= b.1 { a } else { b })
+    ///     })
+    ///     .expect("Some value, since the iterator is not empty")
+    ///     .expect_err("not found");
+    /// ```
+    fn try_reduce_with<T, OP>(self, op: OP) -> Option<Self::Item>
+    where
+        OP: Fn(T, T) -> Self::Item + Sync + Send,
+        Self::Item: Try<Ok = T>,
+    {
+        try_reduce_with::try_reduce_with(self, op)
+    }
+
+    /// Parallel fold is similar to sequential fold except that the
+    /// sequence of items may be subdivided before it is
+    /// folded. Consider a list of numbers like `22 3 77 89 46`. If
+    /// you used sequential fold to add them (`fold(0, |a,b| a+b)`,
+    /// you would wind up first adding 0 + 22, then 22 + 3, then 25 +
+    /// 77, and so forth. The **parallel fold** works similarly except
+    /// that it first breaks up your list into sublists, and hence
+    /// instead of yielding up a single sum at the end, it yields up
+    /// multiple sums. The number of results is nondeterministic, as
+    /// is the point where the breaks occur.
+    ///
+    /// So if did the same parallel fold (`fold(0, |a,b| a+b)`) on
+    /// our example list, we might wind up with a sequence of two numbers,
+    /// like so:
+    ///
+    /// ```notrust
+    /// 22 3 77 89 46
+    ///       |     |
+    ///     102   135
+    /// ```
+    ///
+    /// Or perhaps these three numbers:
+    ///
+    /// ```notrust
+    /// 22 3 77 89 46
+    ///       |  |  |
+    ///     102 89 46
+    /// ```
+    ///
+    /// In general, Rayon will attempt to find good breaking points
+    /// that keep all of your cores busy.
+    ///
+    /// ### Fold versus reduce
+    ///
+    /// The `fold()` and `reduce()` methods each take an identity element
+    /// and a combining function, but they operate rather differently.
+    ///
+    /// `reduce()` requires that the identity function has the same
+    /// type as the things you are iterating over, and it fully
+    /// reduces the list of items into a single item. So, for example,
+    /// imagine we are iterating over a list of bytes `bytes: [128_u8,
+    /// 64_u8, 64_u8]`. If we used `bytes.reduce(|| 0_u8, |a: u8, b:
+    /// u8| a + b)`, we would get an overflow. This is because `0`,
+    /// `a`, and `b` here are all bytes, just like the numbers in the
+    /// list (I wrote the types explicitly above, but those are the
+    /// only types you can use). To avoid the overflow, we would need
+    /// to do something like `bytes.map(|b| b as u32).reduce(|| 0, |a,
+    /// b| a + b)`, in which case our result would be `256`.
+    ///
+    /// In contrast, with `fold()`, the identity function does not
+    /// have to have the same type as the things you are iterating
+    /// over, and you potentially get back many results. So, if we
+    /// continue with the `bytes` example from the previous paragraph,
+    /// we could do `bytes.fold(|| 0_u32, |a, b| a + (b as u32))` to
+    /// convert our bytes into `u32`. And of course we might not get
+    /// back a single sum.
+    ///
+    /// There is a more subtle distinction as well, though it's
+    /// actually implied by the above points. When you use `reduce()`,
+    /// your reduction function is sometimes called with values that
+    /// were never part of your original parallel iterator (for
+    /// example, both the left and right might be a partial sum). With
+    /// `fold()`, in contrast, the left value in the fold function is
+    /// always the accumulator, and the right value is always from
+    /// your original sequence.
+    ///
+    /// ### Fold vs Map/Reduce
+    ///
+    /// Fold makes sense if you have some operation where it is
+    /// cheaper to create groups of elements at a time. For example,
+    /// imagine collecting characters into a string. If you were going
+    /// to use map/reduce, you might try this:
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let s =
+    ///     ['a', 'b', 'c', 'd', 'e']
+    ///     .par_iter()
+    ///     .map(|c: &char| format!("{}", c))
+    ///     .reduce(|| String::new(),
+    ///             |mut a: String, b: String| { a.push_str(&b); a });
+    ///
+    /// assert_eq!(s, "abcde");
+    /// ```
+    ///
+    /// Because reduce produces the same type of element as its input,
+    /// you have to first map each character into a string, and then
+    /// you can reduce them. This means we create one string per
+    /// element in our iterator -- not so great. Using `fold`, we can
+    /// do this instead:
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let s =
+    ///     ['a', 'b', 'c', 'd', 'e']
+    ///     .par_iter()
+    ///     .fold(|| String::new(),
+    ///             |mut s: String, c: &char| { s.push(*c); s })
+    ///     .reduce(|| String::new(),
+    ///             |mut a: String, b: String| { a.push_str(&b); a });
+    ///
+    /// assert_eq!(s, "abcde");
+    /// ```
+    ///
+    /// Now `fold` will process groups of our characters at a time,
+    /// and we only make one string per group. We should wind up with
+    /// some small-ish number of strings roughly proportional to the
+    /// number of CPUs you have (it will ultimately depend on how busy
+    /// your processors are). Note that we still need to do a reduce
+    /// afterwards to combine those groups of strings into a single
+    /// string.
+    ///
+    /// You could use a similar trick to save partial results (e.g., a
+    /// cache) or something similar.
+    ///
+    /// ### Combining fold with other operations
+    ///
+    /// You can combine `fold` with `reduce` if you want to produce a
+    /// single value. This is then roughly equivalent to a map/reduce
+    /// combination in effect:
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let bytes = 0..22_u8;
+    /// let sum = bytes.into_par_iter()
+    ///                .fold(|| 0_u32, |a: u32, b: u8| a + (b as u32))
+    ///                .sum::<u32>();
+    ///
+    /// assert_eq!(sum, (0..22).sum()); // compare to sequential
+    /// ```
+    fn fold<T, ID, F>(self, identity: ID, fold_op: F) -> Fold<Self, ID, F>
+    where
+        F: Fn(T, Self::Item) -> T + Sync + Send,
+        ID: Fn() -> T + Sync + Send,
+        T: Send,
+    {
+        Fold::new(self, identity, fold_op)
+    }
+
+    /// Applies `fold_op` to the given `init` value with each item of this
+    /// iterator, finally producing the value for further use.
+    ///
+    /// This works essentially like `fold(|| init.clone(), fold_op)`, except
+    /// it doesn't require the `init` type to be `Sync`, nor any other form
+    /// of added synchronization.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let bytes = 0..22_u8;
+    /// let sum = bytes.into_par_iter()
+    ///                .fold_with(0_u32, |a: u32, b: u8| a + (b as u32))
+    ///                .sum::<u32>();
+    ///
+    /// assert_eq!(sum, (0..22).sum()); // compare to sequential
+    /// ```
+    fn fold_with<F, T>(self, init: T, fold_op: F) -> FoldWith<Self, T, F>
+    where
+        F: Fn(T, Self::Item) -> T + Sync + Send,
+        T: Send + Clone,
+    {
+        FoldWith::new(self, init, fold_op)
+    }
+
+    /// Performs a fallible parallel fold.
+    ///
+    /// This is a variation of [`fold()`] for operations which can fail with
+    /// `Option::None` or `Result::Err`.  The first such failure stops
+    /// processing the local set of items, without affecting other folds in the
+    /// iterator's subdivisions.
+    ///
+    /// Often, `try_fold()` will be followed by [`try_reduce()`]
+    /// for a final reduction and global short-circuiting effect.
+    ///
+    /// [`fold()`]: #method.fold
+    /// [`try_reduce()`]: #method.try_reduce
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let bytes = 0..22_u8;
+    /// let sum = bytes.into_par_iter()
+    ///                .try_fold(|| 0_u32, |a: u32, b: u8| a.checked_add(b as u32))
+    ///                .try_reduce(|| 0, u32::checked_add);
+    ///
+    /// assert_eq!(sum, Some((0..22).sum())); // compare to sequential
+    /// ```
+    fn try_fold<T, R, ID, F>(self, identity: ID, fold_op: F) -> TryFold<Self, R, ID, F>
+    where
+        F: Fn(T, Self::Item) -> R + Sync + Send,
+        ID: Fn() -> T + Sync + Send,
+        R: Try<Ok = T> + Send,
+    {
+        TryFold::new(self, identity, fold_op)
+    }
+
+    /// Performs a fallible parallel fold with a cloneable `init` value.
+    ///
+    /// This combines the `init` semantics of [`fold_with()`] and the failure
+    /// semantics of [`try_fold()`].
+    ///
+    /// [`fold_with()`]: #method.fold_with
+    /// [`try_fold()`]: #method.try_fold
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let bytes = 0..22_u8;
+    /// let sum = bytes.into_par_iter()
+    ///                .try_fold_with(0_u32, |a: u32, b: u8| a.checked_add(b as u32))
+    ///                .try_reduce(|| 0, u32::checked_add);
+    ///
+    /// assert_eq!(sum, Some((0..22).sum())); // compare to sequential
+    /// ```
+    fn try_fold_with<F, T, R>(self, init: T, fold_op: F) -> TryFoldWith<Self, R, F>
+    where
+        F: Fn(T, Self::Item) -> R + Sync + Send,
+        R: Try<Ok = T> + Send,
+        T: Clone + Send,
+    {
+        TryFoldWith::new(self, init, fold_op)
+    }
+
+    /// Sums up the items in the iterator.
+    ///
+    /// Note that the order in items will be reduced is not specified,
+    /// so if the `+` operator is not truly [associative] \(as is the
+    /// case for floating point numbers), then the results are not
+    /// fully deterministic.
+    ///
+    /// [associative]: https://en.wikipedia.org/wiki/Associative_property
+    ///
+    /// Basically equivalent to `self.reduce(|| 0, |a, b| a + b)`,
+    /// except that the type of `0` and the `+` operation may vary
+    /// depending on the type of value being produced.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let a = [1, 5, 7];
+    ///
+    /// let sum: i32 = a.par_iter().sum();
+    ///
+    /// assert_eq!(sum, 13);
+    /// ```
+    fn sum<S>(self) -> S
+    where
+        S: Send + Sum<Self::Item> + Sum<S>,
+    {
+        sum::sum(self)
+    }
+
+    /// Multiplies all the items in the iterator.
+    ///
+    /// Note that the order in items will be reduced is not specified,
+    /// so if the `*` operator is not truly [associative] \(as is the
+    /// case for floating point numbers), then the results are not
+    /// fully deterministic.
+    ///
+    /// [associative]: https://en.wikipedia.org/wiki/Associative_property
+    ///
+    /// Basically equivalent to `self.reduce(|| 1, |a, b| a * b)`,
+    /// except that the type of `1` and the `*` operation may vary
+    /// depending on the type of value being produced.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// fn factorial(n: u32) -> u32 {
+    ///    (1..n+1).into_par_iter().product()
+    /// }
+    ///
+    /// assert_eq!(factorial(0), 1);
+    /// assert_eq!(factorial(1), 1);
+    /// assert_eq!(factorial(5), 120);
+    /// ```
+    fn product<P>(self) -> P
+    where
+        P: Send + Product<Self::Item> + Product<P>,
+    {
+        product::product(self)
+    }
+
+    /// Computes the minimum of all the items in the iterator. If the
+    /// iterator is empty, `None` is returned; otherwise, `Some(min)`
+    /// is returned.
+    ///
+    /// Note that the order in which the items will be reduced is not
+    /// specified, so if the `Ord` impl is not truly associative, then
+    /// the results are not deterministic.
+    ///
+    /// Basically equivalent to `self.reduce_with(|a, b| cmp::min(a, b))`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let a = [45, 74, 32];
+    ///
+    /// assert_eq!(a.par_iter().min(), Some(&32));
+    ///
+    /// let b: [i32; 0] = [];
+    ///
+    /// assert_eq!(b.par_iter().min(), None);
+    /// ```
+    fn min(self) -> Option<Self::Item>
+    where
+        Self::Item: Ord,
+    {
+        self.reduce_with(cmp::min)
+    }
+
+    /// Computes the minimum of all the items in the iterator with respect to
+    /// the given comparison function. If the iterator is empty, `None` is
+    /// returned; otherwise, `Some(min)` is returned.
+    ///
+    /// Note that the order in which the items will be reduced is not
+    /// specified, so if the comparison function is not associative, then
+    /// the results are not deterministic.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let a = [-3_i32, 77, 53, 240, -1];
+    ///
+    /// assert_eq!(a.par_iter().min_by(|x, y| x.cmp(y)), Some(&-3));
+    /// ```
+    fn min_by<F>(self, f: F) -> Option<Self::Item>
+    where
+        F: Sync + Send + Fn(&Self::Item, &Self::Item) -> Ordering,
+    {
+        fn min<T>(f: impl Fn(&T, &T) -> Ordering) -> impl Fn(T, T) -> T {
+            move |a, b| match f(&a, &b) {
+                Ordering::Greater => b,
+                _ => a,
+            }
+        }
+
+        self.reduce_with(min(f))
+    }
+
+    /// Computes the item that yields the minimum value for the given
+    /// function. If the iterator is empty, `None` is returned;
+    /// otherwise, `Some(item)` is returned.
+    ///
+    /// Note that the order in which the items will be reduced is not
+    /// specified, so if the `Ord` impl is not truly associative, then
+    /// the results are not deterministic.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let a = [-3_i32, 34, 2, 5, -10, -3, -23];
+    ///
+    /// assert_eq!(a.par_iter().min_by_key(|x| x.abs()), Some(&2));
+    /// ```
+    fn min_by_key<K, F>(self, f: F) -> Option<Self::Item>
+    where
+        K: Ord + Send,
+        F: Sync + Send + Fn(&Self::Item) -> K,
+    {
+        fn key<T, K>(f: impl Fn(&T) -> K) -> impl Fn(T) -> (K, T) {
+            move |x| (f(&x), x)
+        }
+
+        fn min_key<T, K: Ord>(a: (K, T), b: (K, T)) -> (K, T) {
+            match (a.0).cmp(&b.0) {
+                Ordering::Greater => b,
+                _ => a,
+            }
+        }
+
+        let (_, x) = self.map(key(f)).reduce_with(min_key)?;
+        Some(x)
+    }
+
+    /// Computes the maximum of all the items in the iterator. If the
+    /// iterator is empty, `None` is returned; otherwise, `Some(max)`
+    /// is returned.
+    ///
+    /// Note that the order in which the items will be reduced is not
+    /// specified, so if the `Ord` impl is not truly associative, then
+    /// the results are not deterministic.
+    ///
+    /// Basically equivalent to `self.reduce_with(|a, b| cmp::max(a, b))`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let a = [45, 74, 32];
+    ///
+    /// assert_eq!(a.par_iter().max(), Some(&74));
+    ///
+    /// let b: [i32; 0] = [];
+    ///
+    /// assert_eq!(b.par_iter().max(), None);
+    /// ```
+    fn max(self) -> Option<Self::Item>
+    where
+        Self::Item: Ord,
+    {
+        self.reduce_with(cmp::max)
+    }
+
+    /// Computes the maximum of all the items in the iterator with respect to
+    /// the given comparison function. If the iterator is empty, `None` is
+    /// returned; otherwise, `Some(min)` is returned.
+    ///
+    /// Note that the order in which the items will be reduced is not
+    /// specified, so if the comparison function is not associative, then
+    /// the results are not deterministic.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let a = [-3_i32, 77, 53, 240, -1];
+    ///
+    /// assert_eq!(a.par_iter().max_by(|x, y| x.abs().cmp(&y.abs())), Some(&240));
+    /// ```
+    fn max_by<F>(self, f: F) -> Option<Self::Item>
+    where
+        F: Sync + Send + Fn(&Self::Item, &Self::Item) -> Ordering,
+    {
+        fn max<T>(f: impl Fn(&T, &T) -> Ordering) -> impl Fn(T, T) -> T {
+            move |a, b| match f(&a, &b) {
+                Ordering::Greater => a,
+                _ => b,
+            }
+        }
+
+        self.reduce_with(max(f))
+    }
+
+    /// Computes the item that yields the maximum value for the given
+    /// function. If the iterator is empty, `None` is returned;
+    /// otherwise, `Some(item)` is returned.
+    ///
+    /// Note that the order in which the items will be reduced is not
+    /// specified, so if the `Ord` impl is not truly associative, then
+    /// the results are not deterministic.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let a = [-3_i32, 34, 2, 5, -10, -3, -23];
+    ///
+    /// assert_eq!(a.par_iter().max_by_key(|x| x.abs()), Some(&34));
+    /// ```
+    fn max_by_key<K, F>(self, f: F) -> Option<Self::Item>
+    where
+        K: Ord + Send,
+        F: Sync + Send + Fn(&Self::Item) -> K,
+    {
+        fn key<T, K>(f: impl Fn(&T) -> K) -> impl Fn(T) -> (K, T) {
+            move |x| (f(&x), x)
+        }
+
+        fn max_key<T, K: Ord>(a: (K, T), b: (K, T)) -> (K, T) {
+            match (a.0).cmp(&b.0) {
+                Ordering::Greater => a,
+                _ => b,
+            }
+        }
+
+        let (_, x) = self.map(key(f)).reduce_with(max_key)?;
+        Some(x)
+    }
+
+    /// Takes two iterators and creates a new iterator over both.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let a = [0, 1, 2];
+    /// let b = [9, 8, 7];
+    ///
+    /// let par_iter = a.par_iter().chain(b.par_iter());
+    ///
+    /// let chained: Vec<_> = par_iter.cloned().collect();
+    ///
+    /// assert_eq!(&chained[..], &[0, 1, 2, 9, 8, 7]);
+    /// ```
+    fn chain<C>(self, chain: C) -> Chain<Self, C::Iter>
+    where
+        C: IntoParallelIterator<Item = Self::Item>,
+    {
+        Chain::new(self, chain.into_par_iter())
+    }
+
+    /// Searches for **some** item in the parallel iterator that
+    /// matches the given predicate and returns it. This operation
+    /// is similar to [`find` on sequential iterators][find] but
+    /// the item returned may not be the **first** one in the parallel
+    /// sequence which matches, since we search the entire sequence in parallel.
+    ///
+    /// Once a match is found, we will attempt to stop processing
+    /// the rest of the items in the iterator as soon as possible
+    /// (just as `find` stops iterating once a match is found).
+    ///
+    /// [find]: https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.find
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let a = [1, 2, 3, 3];
+    ///
+    /// assert_eq!(a.par_iter().find_any(|&&x| x == 3), Some(&3));
+    ///
+    /// assert_eq!(a.par_iter().find_any(|&&x| x == 100), None);
+    /// ```
+    fn find_any<P>(self, predicate: P) -> Option<Self::Item>
+    where
+        P: Fn(&Self::Item) -> bool + Sync + Send,
+    {
+        find::find(self, predicate)
+    }
+
+    /// Searches for the sequentially **first** item in the parallel iterator
+    /// that matches the given predicate and returns it.
+    ///
+    /// Once a match is found, all attempts to the right of the match
+    /// will be stopped, while attempts to the left must continue in case
+    /// an earlier match is found.
+    ///
+    /// Note that not all parallel iterators have a useful order, much like
+    /// sequential `HashMap` iteration, so "first" may be nebulous.  If you
+    /// just want the first match that discovered anywhere in the iterator,
+    /// `find_any` is a better choice.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let a = [1, 2, 3, 3];
+    ///
+    /// assert_eq!(a.par_iter().find_first(|&&x| x == 3), Some(&3));
+    ///
+    /// assert_eq!(a.par_iter().find_first(|&&x| x == 100), None);
+    /// ```
+    fn find_first<P>(self, predicate: P) -> Option<Self::Item>
+    where
+        P: Fn(&Self::Item) -> bool + Sync + Send,
+    {
+        find_first_last::find_first(self, predicate)
+    }
+
+    /// Searches for the sequentially **last** item in the parallel iterator
+    /// that matches the given predicate and returns it.
+    ///
+    /// Once a match is found, all attempts to the left of the match
+    /// will be stopped, while attempts to the right must continue in case
+    /// a later match is found.
+    ///
+    /// Note that not all parallel iterators have a useful order, much like
+    /// sequential `HashMap` iteration, so "last" may be nebulous.  When the
+    /// order doesn't actually matter to you, `find_any` is a better choice.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let a = [1, 2, 3, 3];
+    ///
+    /// assert_eq!(a.par_iter().find_last(|&&x| x == 3), Some(&3));
+    ///
+    /// assert_eq!(a.par_iter().find_last(|&&x| x == 100), None);
+    /// ```
+    fn find_last<P>(self, predicate: P) -> Option<Self::Item>
+    where
+        P: Fn(&Self::Item) -> bool + Sync + Send,
+    {
+        find_first_last::find_last(self, predicate)
+    }
+
+    /// Applies the given predicate to the items in the parallel iterator
+    /// and returns **any** non-None result of the map operation.
+    ///
+    /// Once a non-None value is produced from the map operation, we will
+    /// attempt to stop processing the rest of the items in the iterator
+    /// as soon as possible.
+    ///
+    /// Note that this method only returns **some** item in the parallel
+    /// iterator that is not None from the map predicate. The item returned
+    /// may not be the **first** non-None value produced in the parallel
+    /// sequence, since the entire sequence is mapped over in parallel.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let c = ["lol", "NaN", "5", "5"];
+    ///
+    /// let found_number = c.par_iter().find_map_any(|s| s.parse().ok());
+    ///
+    /// assert_eq!(found_number, Some(5));
+    /// ```
+    fn find_map_any<P, R>(self, predicate: P) -> Option<R>
+    where
+        P: Fn(Self::Item) -> Option<R> + Sync + Send,
+        R: Send,
+    {
+        fn yes<T>(_: &T) -> bool {
+            true
+        }
+        self.filter_map(predicate).find_any(yes)
+    }
+
+    /// Applies the given predicate to the items in the parallel iterator and
+    /// returns the sequentially **first** non-None result of the map operation.
+    ///
+    /// Once a non-None value is produced from the map operation, all attempts
+    /// to the right of the match will be stopped, while attempts to the left
+    /// must continue in case an earlier match is found.
+    ///
+    /// Note that not all parallel iterators have a useful order, much like
+    /// sequential `HashMap` iteration, so "first" may be nebulous. If you
+    /// just want the first non-None value discovered anywhere in the iterator,
+    /// `find_map_any` is a better choice.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let c = ["lol", "NaN", "2", "5"];
+    ///
+    /// let first_number = c.par_iter().find_map_first(|s| s.parse().ok());
+    ///
+    /// assert_eq!(first_number, Some(2));
+    /// ```
+    fn find_map_first<P, R>(self, predicate: P) -> Option<R>
+    where
+        P: Fn(Self::Item) -> Option<R> + Sync + Send,
+        R: Send,
+    {
+        fn yes<T>(_: &T) -> bool {
+            true
+        }
+        self.filter_map(predicate).find_first(yes)
+    }
+
+    /// Applies the given predicate to the items in the parallel iterator and
+    /// returns the sequentially **last** non-None result of the map operation.
+    ///
+    /// Once a non-None value is produced from the map operation, all attempts
+    /// to the left of the match will be stopped, while attempts to the right
+    /// must continue in case a later match is found.
+    ///
+    /// Note that not all parallel iterators have a useful order, much like
+    /// sequential `HashMap` iteration, so "first" may be nebulous. If you
+    /// just want the first non-None value discovered anywhere in the iterator,
+    /// `find_map_any` is a better choice.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let c = ["lol", "NaN", "2", "5"];
+    ///
+    /// let last_number = c.par_iter().find_map_last(|s| s.parse().ok());
+    ///
+    /// assert_eq!(last_number, Some(5));
+    /// ```
+    fn find_map_last<P, R>(self, predicate: P) -> Option<R>
+    where
+        P: Fn(Self::Item) -> Option<R> + Sync + Send,
+        R: Send,
+    {
+        fn yes<T>(_: &T) -> bool {
+            true
+        }
+        self.filter_map(predicate).find_last(yes)
+    }
+
+    #[doc(hidden)]
+    #[deprecated(note = "parallel `find` does not search in order -- use `find_any`, \\
+                         `find_first`, or `find_last`")]
+    fn find<P>(self, predicate: P) -> Option<Self::Item>
+    where
+        P: Fn(&Self::Item) -> bool + Sync + Send,
+    {
+        self.find_any(predicate)
+    }
+
+    /// Searches for **some** item in the parallel iterator that
+    /// matches the given predicate, and if so returns true.  Once
+    /// a match is found, we'll attempt to stop process the rest
+    /// of the items.  Proving that there's no match, returning false,
+    /// does require visiting every item.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let a = [0, 12, 3, 4, 0, 23, 0];
+    ///
+    /// let is_valid = a.par_iter().any(|&x| x > 10);
+    ///
+    /// assert!(is_valid);
+    /// ```
+    fn any<P>(self, predicate: P) -> bool
+    where
+        P: Fn(Self::Item) -> bool + Sync + Send,
+    {
+        self.map(predicate).find_any(bool::clone).is_some()
+    }
+
+    /// Tests that every item in the parallel iterator matches the given
+    /// predicate, and if so returns true.  If a counter-example is found,
+    /// we'll attempt to stop processing more items, then return false.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let a = [0, 12, 3, 4, 0, 23, 0];
+    ///
+    /// let is_valid = a.par_iter().all(|&x| x > 10);
+    ///
+    /// assert!(!is_valid);
+    /// ```
+    fn all<P>(self, predicate: P) -> bool
+    where
+        P: Fn(Self::Item) -> bool + Sync + Send,
+    {
+        #[inline]
+        fn is_false(x: &bool) -> bool {
+            !x
+        }
+
+        self.map(predicate).find_any(is_false).is_none()
+    }
+
+    /// Creates an iterator over the `Some` items of this iterator, halting
+    /// as soon as any `None` is found.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// use std::sync::atomic::{AtomicUsize, Ordering};
+    ///
+    /// let counter = AtomicUsize::new(0);
+    /// let value = (0_i32..2048)
+    ///     .into_par_iter()
+    ///     .map(|x| {
+    ///              counter.fetch_add(1, Ordering::SeqCst);
+    ///              if x < 1024 { Some(x) } else { None }
+    ///          })
+    ///     .while_some()
+    ///     .max();
+    ///
+    /// assert!(value < Some(1024));
+    /// assert!(counter.load(Ordering::SeqCst) < 2048); // should not have visited every single one
+    /// ```
+    fn while_some<T>(self) -> WhileSome<Self>
+    where
+        Self: ParallelIterator<Item = Option<T>>,
+        T: Send,
+    {
+        WhileSome::new(self)
+    }
+
+    /// Wraps an iterator with a fuse in case of panics, to halt all threads
+    /// as soon as possible.
+    ///
+    /// Panics within parallel iterators are always propagated to the caller,
+    /// but they don't always halt the rest of the iterator right away, due to
+    /// the internal semantics of [`join`]. This adaptor makes a greater effort
+    /// to stop processing other items sooner, with the cost of additional
+    /// synchronization overhead, which may also inhibit some optimizations.
+    ///
+    /// [`join`]: ../fn.join.html#panics
+    ///
+    /// # Examples
+    ///
+    /// If this code didn't use `panic_fuse()`, it would continue processing
+    /// many more items in other threads (with long sleep delays) before the
+    /// panic is finally propagated.
+    ///
+    /// ```should_panic
+    /// use rayon::prelude::*;
+    /// use std::{thread, time};
+    ///
+    /// (0..1_000_000)
+    ///     .into_par_iter()
+    ///     .panic_fuse()
+    ///     .for_each(|i| {
+    ///         // simulate some work
+    ///         thread::sleep(time::Duration::from_secs(1));
+    ///         assert!(i > 0); // oops!
+    ///     });
+    /// ```
+    fn panic_fuse(self) -> PanicFuse<Self> {
+        PanicFuse::new(self)
+    }
+
+    /// Creates a fresh collection containing all the elements produced
+    /// by this parallel iterator.
+    ///
+    /// You may prefer [`collect_into_vec()`] implemented on
+    /// [`IndexedParallelIterator`], if your underlying iterator also implements
+    /// it. [`collect_into_vec()`] allocates efficiently with precise knowledge
+    /// of how many elements the iterator contains, and even allows you to reuse
+    /// an existing vector's backing store rather than allocating a fresh vector.
+    ///
+    /// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html
+    /// [`collect_into_vec()`]:
+    ///     trait.IndexedParallelIterator.html#method.collect_into_vec
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let sync_vec: Vec<_> = (0..100).into_iter().collect();
+    ///
+    /// let async_vec: Vec<_> = (0..100).into_par_iter().collect();
+    ///
+    /// assert_eq!(sync_vec, async_vec);
+    /// ```
+    fn collect<C>(self) -> C
+    where
+        C: FromParallelIterator<Self::Item>,
+    {
+        C::from_par_iter(self)
+    }
+
+    /// Unzips the items of a parallel iterator into a pair of arbitrary
+    /// `ParallelExtend` containers.
+    ///
+    /// You may prefer to use `unzip_into_vecs()`, which allocates more
+    /// efficiently with precise knowledge of how many elements the
+    /// iterator contains, and even allows you to reuse existing
+    /// vectors' backing stores rather than allocating fresh vectors.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let a = [(0, 1), (1, 2), (2, 3), (3, 4)];
+    ///
+    /// let (left, right): (Vec<_>, Vec<_>) = a.par_iter().cloned().unzip();
+    ///
+    /// assert_eq!(left, [0, 1, 2, 3]);
+    /// assert_eq!(right, [1, 2, 3, 4]);
+    /// ```
+    ///
+    /// Nested pairs can be unzipped too.
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let (values, (squares, cubes)): (Vec<_>, (Vec<_>, Vec<_>)) = (0..4).into_par_iter()
+    ///     .map(|i| (i, (i * i, i * i * i)))
+    ///     .unzip();
+    ///
+    /// assert_eq!(values, [0, 1, 2, 3]);
+    /// assert_eq!(squares, [0, 1, 4, 9]);
+    /// assert_eq!(cubes, [0, 1, 8, 27]);
+    /// ```
+    fn unzip<A, B, FromA, FromB>(self) -> (FromA, FromB)
+    where
+        Self: ParallelIterator<Item = (A, B)>,
+        FromA: Default + Send + ParallelExtend<A>,
+        FromB: Default + Send + ParallelExtend<B>,
+        A: Send,
+        B: Send,
+    {
+        unzip::unzip(self)
+    }
+
+    /// Partitions the items of a parallel iterator into a pair of arbitrary
+    /// `ParallelExtend` containers.  Items for which the `predicate` returns
+    /// true go into the first container, and the rest go into the second.
+    ///
+    /// Note: unlike the standard `Iterator::partition`, this allows distinct
+    /// collection types for the left and right items.  This is more flexible,
+    /// but may require new type annotations when converting sequential code
+    /// that used type inferrence assuming the two were the same.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let (left, right): (Vec<_>, Vec<_>) = (0..8).into_par_iter().partition(|x| x % 2 == 0);
+    ///
+    /// assert_eq!(left, [0, 2, 4, 6]);
+    /// assert_eq!(right, [1, 3, 5, 7]);
+    /// ```
+    fn partition<A, B, P>(self, predicate: P) -> (A, B)
+    where
+        A: Default + Send + ParallelExtend<Self::Item>,
+        B: Default + Send + ParallelExtend<Self::Item>,
+        P: Fn(&Self::Item) -> bool + Sync + Send,
+    {
+        unzip::partition(self, predicate)
+    }
+
+    /// Partitions and maps the items of a parallel iterator into a pair of
+    /// arbitrary `ParallelExtend` containers.  `Either::Left` items go into
+    /// the first container, and `Either::Right` items go into the second.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// use rayon::iter::Either;
+    ///
+    /// let (left, right): (Vec<_>, Vec<_>) = (0..8).into_par_iter()
+    ///     .partition_map(|x| {
+    ///         if x % 2 == 0 {
+    ///             Either::Left(x * 4)
+    ///         } else {
+    ///             Either::Right(x * 3)
+    ///         }
+    ///     });
+    ///
+    /// assert_eq!(left, [0, 8, 16, 24]);
+    /// assert_eq!(right, [3, 9, 15, 21]);
+    /// ```
+    ///
+    /// Nested `Either` enums can be split as well.
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// use rayon::iter::Either::*;
+    ///
+    /// let ((fizzbuzz, fizz), (buzz, other)): ((Vec<_>, Vec<_>), (Vec<_>, Vec<_>)) = (1..20)
+    ///     .into_par_iter()
+    ///     .partition_map(|x| match (x % 3, x % 5) {
+    ///         (0, 0) => Left(Left(x)),
+    ///         (0, _) => Left(Right(x)),
+    ///         (_, 0) => Right(Left(x)),
+    ///         (_, _) => Right(Right(x)),
+    ///     });
+    ///
+    /// assert_eq!(fizzbuzz, [15]);
+    /// assert_eq!(fizz, [3, 6, 9, 12, 18]);
+    /// assert_eq!(buzz, [5, 10]);
+    /// assert_eq!(other, [1, 2, 4, 7, 8, 11, 13, 14, 16, 17, 19]);
+    /// ```
+    fn partition_map<A, B, P, L, R>(self, predicate: P) -> (A, B)
+    where
+        A: Default + Send + ParallelExtend<L>,
+        B: Default + Send + ParallelExtend<R>,
+        P: Fn(Self::Item) -> Either<L, R> + Sync + Send,
+        L: Send,
+        R: Send,
+    {
+        unzip::partition_map(self, predicate)
+    }
+
+    /// Intersperses clones of an element between items of this iterator.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let x = vec![1, 2, 3];
+    /// let r: Vec<_> = x.into_par_iter().intersperse(-1).collect();
+    ///
+    /// assert_eq!(r, vec![1, -1, 2, -1, 3]);
+    /// ```
+    fn intersperse(self, element: Self::Item) -> Intersperse<Self>
+    where
+        Self::Item: Clone,
+    {
+        Intersperse::new(self, element)
+    }
+
+    /// Internal method used to define the behavior of this parallel
+    /// iterator. You should not need to call this directly.
+    ///
+    /// This method causes the iterator `self` to start producing
+    /// items and to feed them to the consumer `consumer` one by one.
+    /// It may split the consumer before doing so to create the
+    /// opportunity to produce in parallel.
+    ///
+    /// See the [README] for more details on the internals of parallel
+    /// iterators.
+    ///
+    /// [README]: README.md
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>;
+
+    /// Internal method used to define the behavior of this parallel
+    /// iterator. You should not need to call this directly.
+    ///
+    /// Returns the number of items produced by this iterator, if known
+    /// statically. This can be used by consumers to trigger special fast
+    /// paths. Therefore, if `Some(_)` is returned, this iterator must only
+    /// use the (indexed) `Consumer` methods when driving a consumer, such
+    /// as `split_at()`. Calling `UnindexedConsumer::split_off_left()` or
+    /// other `UnindexedConsumer` methods -- or returning an inaccurate
+    /// value -- may result in panics.
+    ///
+    /// This method is currently used to optimize `collect` for want
+    /// of true Rust specialization; it may be removed when
+    /// specialization is stable.
+    fn opt_len(&self) -> Option<usize> {
+        None
+    }
+}
+
+impl<T: ParallelIterator> IntoParallelIterator for T {
+    type Iter = T;
+    type Item = T::Item;
+
+    fn into_par_iter(self) -> T {
+        self
+    }
+}
+
+/// An iterator that supports "random access" to its data, meaning
+/// that you can split it at arbitrary indices and draw data from
+/// those points.
+///
+/// **Note:** Not implemented for `u64`, `i64`, `u128`, or `i128` ranges
+pub trait IndexedParallelIterator: ParallelIterator {
+    /// Collects the results of the iterator into the specified
+    /// vector. The vector is always truncated before execution
+    /// begins. If possible, reusing the vector across calls can lead
+    /// to better performance since it reuses the same backing buffer.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// // any prior data will be truncated
+    /// let mut vec = vec![-1, -2, -3];
+    ///
+    /// (0..5).into_par_iter()
+    ///     .collect_into_vec(&mut vec);
+    ///
+    /// assert_eq!(vec, [0, 1, 2, 3, 4]);
+    /// ```
+    fn collect_into_vec(self, target: &mut Vec<Self::Item>) {
+        collect::collect_into_vec(self, target);
+    }
+
+    /// Unzips the results of the iterator into the specified
+    /// vectors. The vectors are always truncated before execution
+    /// begins. If possible, reusing the vectors across calls can lead
+    /// to better performance since they reuse the same backing buffer.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// // any prior data will be truncated
+    /// let mut left = vec![42; 10];
+    /// let mut right = vec![-1; 10];
+    ///
+    /// (10..15).into_par_iter()
+    ///     .enumerate()
+    ///     .unzip_into_vecs(&mut left, &mut right);
+    ///
+    /// assert_eq!(left, [0, 1, 2, 3, 4]);
+    /// assert_eq!(right, [10, 11, 12, 13, 14]);
+    /// ```
+    fn unzip_into_vecs<A, B>(self, left: &mut Vec<A>, right: &mut Vec<B>)
+    where
+        Self: IndexedParallelIterator<Item = (A, B)>,
+        A: Send,
+        B: Send,
+    {
+        collect::unzip_into_vecs(self, left, right);
+    }
+
+    /// Iterates over tuples `(A, B)`, where the items `A` are from
+    /// this iterator and `B` are from the iterator given as argument.
+    /// Like the `zip` method on ordinary iterators, if the two
+    /// iterators are of unequal length, you only get the items they
+    /// have in common.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let result: Vec<_> = (1..4)
+    ///     .into_par_iter()
+    ///     .zip(vec!['a', 'b', 'c'])
+    ///     .collect();
+    ///
+    /// assert_eq!(result, [(1, 'a'), (2, 'b'), (3, 'c')]);
+    /// ```
+    fn zip<Z>(self, zip_op: Z) -> Zip<Self, Z::Iter>
+    where
+        Z: IntoParallelIterator,
+        Z::Iter: IndexedParallelIterator,
+    {
+        Zip::new(self, zip_op.into_par_iter())
+    }
+
+    /// The same as `Zip`, but requires that both iterators have the same length.
+    ///
+    /// # Panics
+    /// Will panic if `self` and `zip_op` are not the same length.
+    ///
+    /// ```should_panic
+    /// use rayon::prelude::*;
+    ///
+    /// let one = [1u8];
+    /// let two = [2u8, 2];
+    /// let one_iter = one.par_iter();
+    /// let two_iter = two.par_iter();
+    ///
+    /// // this will panic
+    /// let zipped: Vec<(&u8, &u8)> = one_iter.zip_eq(two_iter).collect();
+    ///
+    /// // we should never get here
+    /// assert_eq!(1, zipped.len());
+    /// ```
+    fn zip_eq<Z>(self, zip_op: Z) -> ZipEq<Self, Z::Iter>
+    where
+        Z: IntoParallelIterator,
+        Z::Iter: IndexedParallelIterator,
+    {
+        let zip_op_iter = zip_op.into_par_iter();
+        assert_eq!(self.len(), zip_op_iter.len());
+        ZipEq::new(self, zip_op_iter)
+    }
+
+    /// Interleaves elements of this iterator and the other given
+    /// iterator. Alternately yields elements from this iterator and
+    /// the given iterator, until both are exhausted. If one iterator
+    /// is exhausted before the other, the last elements are provided
+    /// from the other.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// let (x, y) = (vec![1, 2], vec![3, 4, 5, 6]);
+    /// let r: Vec<i32> = x.into_par_iter().interleave(y).collect();
+    /// assert_eq!(r, vec![1, 3, 2, 4, 5, 6]);
+    /// ```
+    fn interleave<I>(self, other: I) -> Interleave<Self, I::Iter>
+    where
+        I: IntoParallelIterator<Item = Self::Item>,
+        I::Iter: IndexedParallelIterator<Item = Self::Item>,
+    {
+        Interleave::new(self, other.into_par_iter())
+    }
+
+    /// Interleaves elements of this iterator and the other given
+    /// iterator, until one is exhausted.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// let (x, y) = (vec![1, 2, 3, 4], vec![5, 6]);
+    /// let r: Vec<i32> = x.into_par_iter().interleave_shortest(y).collect();
+    /// assert_eq!(r, vec![1, 5, 2, 6, 3]);
+    /// ```
+    fn interleave_shortest<I>(self, other: I) -> InterleaveShortest<Self, I::Iter>
+    where
+        I: IntoParallelIterator<Item = Self::Item>,
+        I::Iter: IndexedParallelIterator<Item = Self::Item>,
+    {
+        InterleaveShortest::new(self, other.into_par_iter())
+    }
+
+    /// Splits an iterator up into fixed-size chunks.
+    ///
+    /// Returns an iterator that returns `Vec`s of the given number of elements.
+    /// If the number of elements in the iterator is not divisible by `chunk_size`,
+    /// the last chunk may be shorter than `chunk_size`.
+    ///
+    /// See also [`par_chunks()`] and [`par_chunks_mut()`] for similar behavior on
+    /// slices, without having to allocate intermediate `Vec`s for the chunks.
+    ///
+    /// [`par_chunks()`]: ../slice/trait.ParallelSlice.html#method.par_chunks
+    /// [`par_chunks_mut()`]: ../slice/trait.ParallelSliceMut.html#method.par_chunks_mut
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// let a = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+    /// let r: Vec<Vec<i32>> = a.into_par_iter().chunks(3).collect();
+    /// assert_eq!(r, vec![vec![1,2,3], vec![4,5,6], vec![7,8,9], vec![10]]);
+    /// ```
+    fn chunks(self, chunk_size: usize) -> Chunks<Self> {
+        assert!(chunk_size != 0, "chunk_size must not be zero");
+        Chunks::new(self, chunk_size)
+    }
+
+    /// Lexicographically compares the elements of this `ParallelIterator` with those of
+    /// another.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// use std::cmp::Ordering::*;
+    ///
+    /// let x = vec![1, 2, 3];
+    /// assert_eq!(x.par_iter().cmp(&vec![1, 3, 0]), Less);
+    /// assert_eq!(x.par_iter().cmp(&vec![1, 2, 3]), Equal);
+    /// assert_eq!(x.par_iter().cmp(&vec![1, 2]), Greater);
+    /// ```
+    fn cmp<I>(self, other: I) -> Ordering
+    where
+        I: IntoParallelIterator<Item = Self::Item>,
+        I::Iter: IndexedParallelIterator,
+        Self::Item: Ord,
+    {
+        #[inline]
+        fn ordering<T: Ord>((x, y): (T, T)) -> Ordering {
+            Ord::cmp(&x, &y)
+        }
+
+        #[inline]
+        fn inequal(&ord: &Ordering) -> bool {
+            ord != Ordering::Equal
+        }
+
+        let other = other.into_par_iter();
+        let ord_len = self.len().cmp(&other.len());
+        self.zip(other)
+            .map(ordering)
+            .find_first(inequal)
+            .unwrap_or(ord_len)
+    }
+
+    /// Lexicographically compares the elements of this `ParallelIterator` with those of
+    /// another.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// use std::cmp::Ordering::*;
+    /// use std::f64::NAN;
+    ///
+    /// let x = vec![1.0, 2.0, 3.0];
+    /// assert_eq!(x.par_iter().partial_cmp(&vec![1.0, 3.0, 0.0]), Some(Less));
+    /// assert_eq!(x.par_iter().partial_cmp(&vec![1.0, 2.0, 3.0]), Some(Equal));
+    /// assert_eq!(x.par_iter().partial_cmp(&vec![1.0, 2.0]), Some(Greater));
+    /// assert_eq!(x.par_iter().partial_cmp(&vec![1.0, NAN]), None);
+    /// ```
+    fn partial_cmp<I>(self, other: I) -> Option<Ordering>
+    where
+        I: IntoParallelIterator,
+        I::Iter: IndexedParallelIterator,
+        Self::Item: PartialOrd<I::Item>,
+    {
+        #[inline]
+        fn ordering<T: PartialOrd<U>, U>((x, y): (T, U)) -> Option<Ordering> {
+            PartialOrd::partial_cmp(&x, &y)
+        }
+
+        #[inline]
+        fn inequal(&ord: &Option<Ordering>) -> bool {
+            ord != Some(Ordering::Equal)
+        }
+
+        let other = other.into_par_iter();
+        let ord_len = self.len().cmp(&other.len());
+        self.zip(other)
+            .map(ordering)
+            .find_first(inequal)
+            .unwrap_or(Some(ord_len))
+    }
+
+    /// Determines if the elements of this `ParallelIterator`
+    /// are equal to those of another
+    fn eq<I>(self, other: I) -> bool
+    where
+        I: IntoParallelIterator,
+        I::Iter: IndexedParallelIterator,
+        Self::Item: PartialEq<I::Item>,
+    {
+        #[inline]
+        fn eq<T: PartialEq<U>, U>((x, y): (T, U)) -> bool {
+            PartialEq::eq(&x, &y)
+        }
+
+        let other = other.into_par_iter();
+        self.len() == other.len() && self.zip(other).all(eq)
+    }
+
+    /// Determines if the elements of this `ParallelIterator`
+    /// are unequal to those of another
+    fn ne<I>(self, other: I) -> bool
+    where
+        I: IntoParallelIterator,
+        I::Iter: IndexedParallelIterator,
+        Self::Item: PartialEq<I::Item>,
+    {
+        !self.eq(other)
+    }
+
+    /// Determines if the elements of this `ParallelIterator`
+    /// are lexicographically less than those of another.
+    fn lt<I>(self, other: I) -> bool
+    where
+        I: IntoParallelIterator,
+        I::Iter: IndexedParallelIterator,
+        Self::Item: PartialOrd<I::Item>,
+    {
+        self.partial_cmp(other) == Some(Ordering::Less)
+    }
+
+    /// Determines if the elements of this `ParallelIterator`
+    /// are less or equal to those of another.
+    fn le<I>(self, other: I) -> bool
+    where
+        I: IntoParallelIterator,
+        I::Iter: IndexedParallelIterator,
+        Self::Item: PartialOrd<I::Item>,
+    {
+        let ord = self.partial_cmp(other);
+        ord == Some(Ordering::Equal) || ord == Some(Ordering::Less)
+    }
+
+    /// Determines if the elements of this `ParallelIterator`
+    /// are lexicographically greater than those of another.
+    fn gt<I>(self, other: I) -> bool
+    where
+        I: IntoParallelIterator,
+        I::Iter: IndexedParallelIterator,
+        Self::Item: PartialOrd<I::Item>,
+    {
+        self.partial_cmp(other) == Some(Ordering::Greater)
+    }
+
+    /// Determines if the elements of this `ParallelIterator`
+    /// are less or equal to those of another.
+    fn ge<I>(self, other: I) -> bool
+    where
+        I: IntoParallelIterator,
+        I::Iter: IndexedParallelIterator,
+        Self::Item: PartialOrd<I::Item>,
+    {
+        let ord = self.partial_cmp(other);
+        ord == Some(Ordering::Equal) || ord == Some(Ordering::Greater)
+    }
+
+    /// Yields an index along with each item.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let chars = vec!['a', 'b', 'c'];
+    /// let result: Vec<_> = chars
+    ///     .into_par_iter()
+    ///     .enumerate()
+    ///     .collect();
+    ///
+    /// assert_eq!(result, [(0, 'a'), (1, 'b'), (2, 'c')]);
+    /// ```
+    fn enumerate(self) -> Enumerate<Self> {
+        Enumerate::new(self)
+    }
+
+    /// Creates an iterator that steps by the given amount
+    ///
+    /// # Examples
+    ///
+    /// ```
+    ///use rayon::prelude::*;
+    ///
+    /// let range = (3..10);
+    /// let result: Vec<i32> = range
+    ///    .into_par_iter()
+    ///    .step_by(3)
+    ///    .collect();
+    ///
+    /// assert_eq!(result, [3, 6, 9])
+    /// ```
+    ///
+    /// # Compatibility
+    ///
+    /// This method is only available on Rust 1.38 or greater.
+    #[cfg(step_by)]
+    fn step_by(self, step: usize) -> StepBy<Self> {
+        StepBy::new(self, step)
+    }
+
+    /// Creates an iterator that skips the first `n` elements.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let result: Vec<_> = (0..100)
+    ///     .into_par_iter()
+    ///     .skip(95)
+    ///     .collect();
+    ///
+    /// assert_eq!(result, [95, 96, 97, 98, 99]);
+    /// ```
+    fn skip(self, n: usize) -> Skip<Self> {
+        Skip::new(self, n)
+    }
+
+    /// Creates an iterator that yields the first `n` elements.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let result: Vec<_> = (0..100)
+    ///     .into_par_iter()
+    ///     .take(5)
+    ///     .collect();
+    ///
+    /// assert_eq!(result, [0, 1, 2, 3, 4]);
+    /// ```
+    fn take(self, n: usize) -> Take<Self> {
+        Take::new(self, n)
+    }
+
+    /// Searches for **some** item in the parallel iterator that
+    /// matches the given predicate, and returns its index.  Like
+    /// `ParallelIterator::find_any`, the parallel search will not
+    /// necessarily find the **first** match, and once a match is
+    /// found we'll attempt to stop processing any more.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let a = [1, 2, 3, 3];
+    ///
+    /// let i = a.par_iter().position_any(|&x| x == 3).expect("found");
+    /// assert!(i == 2 || i == 3);
+    ///
+    /// assert_eq!(a.par_iter().position_any(|&x| x == 100), None);
+    /// ```
+    fn position_any<P>(self, predicate: P) -> Option<usize>
+    where
+        P: Fn(Self::Item) -> bool + Sync + Send,
+    {
+        #[inline]
+        fn check(&(_, p): &(usize, bool)) -> bool {
+            p
+        }
+
+        let (i, _) = self.map(predicate).enumerate().find_any(check)?;
+        Some(i)
+    }
+
+    /// Searches for the sequentially **first** item in the parallel iterator
+    /// that matches the given predicate, and returns its index.
+    ///
+    /// Like `ParallelIterator::find_first`, once a match is found,
+    /// all attempts to the right of the match will be stopped, while
+    /// attempts to the left must continue in case an earlier match
+    /// is found.
+    ///
+    /// Note that not all parallel iterators have a useful order, much like
+    /// sequential `HashMap` iteration, so "first" may be nebulous.  If you
+    /// just want the first match that discovered anywhere in the iterator,
+    /// `position_any` is a better choice.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let a = [1, 2, 3, 3];
+    ///
+    /// assert_eq!(a.par_iter().position_first(|&x| x == 3), Some(2));
+    ///
+    /// assert_eq!(a.par_iter().position_first(|&x| x == 100), None);
+    /// ```
+    fn position_first<P>(self, predicate: P) -> Option<usize>
+    where
+        P: Fn(Self::Item) -> bool + Sync + Send,
+    {
+        #[inline]
+        fn check(&(_, p): &(usize, bool)) -> bool {
+            p
+        }
+
+        let (i, _) = self.map(predicate).enumerate().find_first(check)?;
+        Some(i)
+    }
+
+    /// Searches for the sequentially **last** item in the parallel iterator
+    /// that matches the given predicate, and returns its index.
+    ///
+    /// Like `ParallelIterator::find_last`, once a match is found,
+    /// all attempts to the left of the match will be stopped, while
+    /// attempts to the right must continue in case a later match
+    /// is found.
+    ///
+    /// Note that not all parallel iterators have a useful order, much like
+    /// sequential `HashMap` iteration, so "last" may be nebulous.  When the
+    /// order doesn't actually matter to you, `position_any` is a better
+    /// choice.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let a = [1, 2, 3, 3];
+    ///
+    /// assert_eq!(a.par_iter().position_last(|&x| x == 3), Some(3));
+    ///
+    /// assert_eq!(a.par_iter().position_last(|&x| x == 100), None);
+    /// ```
+    fn position_last<P>(self, predicate: P) -> Option<usize>
+    where
+        P: Fn(Self::Item) -> bool + Sync + Send,
+    {
+        #[inline]
+        fn check(&(_, p): &(usize, bool)) -> bool {
+            p
+        }
+
+        let (i, _) = self.map(predicate).enumerate().find_last(check)?;
+        Some(i)
+    }
+
+    #[doc(hidden)]
+    #[deprecated(
+        note = "parallel `position` does not search in order -- use `position_any`, \\
+                `position_first`, or `position_last`"
+    )]
+    fn position<P>(self, predicate: P) -> Option<usize>
+    where
+        P: Fn(Self::Item) -> bool + Sync + Send,
+    {
+        self.position_any(predicate)
+    }
+
+    /// Searches for items in the parallel iterator that match the given
+    /// predicate, and returns their indices.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let primes = vec![2, 3, 5, 7, 11, 13, 17, 19, 23, 29];
+    ///
+    /// // Find the positions of primes congruent to 1 modulo 6
+    /// let p1mod6: Vec<_> = primes.par_iter().positions(|&p| p % 6 == 1).collect();
+    /// assert_eq!(p1mod6, [3, 5, 7]); // primes 7, 13, and 19
+    ///
+    /// // Find the positions of primes congruent to 5 modulo 6
+    /// let p5mod6: Vec<_> = primes.par_iter().positions(|&p| p % 6 == 5).collect();
+    /// assert_eq!(p5mod6, [2, 4, 6, 8, 9]); // primes 5, 11, 17, 23, and 29
+    /// ```
+    fn positions<P>(self, predicate: P) -> Positions<Self, P>
+    where
+        P: Fn(Self::Item) -> bool + Sync + Send,
+    {
+        Positions::new(self, predicate)
+    }
+
+    /// Produces a new iterator with the elements of this iterator in
+    /// reverse order.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let result: Vec<_> = (0..5)
+    ///     .into_par_iter()
+    ///     .rev()
+    ///     .collect();
+    ///
+    /// assert_eq!(result, [4, 3, 2, 1, 0]);
+    /// ```
+    fn rev(self) -> Rev<Self> {
+        Rev::new(self)
+    }
+
+    /// Sets the minimum length of iterators desired to process in each
+    /// thread.  Rayon will not split any smaller than this length, but
+    /// of course an iterator could already be smaller to begin with.
+    ///
+    /// Producers like `zip` and `interleave` will use greater of the two
+    /// minimums.
+    /// Chained iterators and iterators inside `flat_map` may each use
+    /// their own minimum length.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let min = (0..1_000_000)
+    ///     .into_par_iter()
+    ///     .with_min_len(1234)
+    ///     .fold(|| 0, |acc, _| acc + 1) // count how many are in this segment
+    ///     .min().unwrap();
+    ///
+    /// assert!(min >= 1234);
+    /// ```
+    fn with_min_len(self, min: usize) -> MinLen<Self> {
+        MinLen::new(self, min)
+    }
+
+    /// Sets the maximum length of iterators desired to process in each
+    /// thread.  Rayon will try to split at least below this length,
+    /// unless that would put it below the length from `with_min_len()`.
+    /// For example, given min=10 and max=15, a length of 16 will not be
+    /// split any further.
+    ///
+    /// Producers like `zip` and `interleave` will use lesser of the two
+    /// maximums.
+    /// Chained iterators and iterators inside `flat_map` may each use
+    /// their own maximum length.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let max = (0..1_000_000)
+    ///     .into_par_iter()
+    ///     .with_max_len(1234)
+    ///     .fold(|| 0, |acc, _| acc + 1) // count how many are in this segment
+    ///     .max().unwrap();
+    ///
+    /// assert!(max <= 1234);
+    /// ```
+    fn with_max_len(self, max: usize) -> MaxLen<Self> {
+        MaxLen::new(self, max)
+    }
+
+    /// Produces an exact count of how many items this iterator will
+    /// produce, presuming no panic occurs.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let par_iter = (0..100).into_par_iter().zip(vec![0; 10]);
+    /// assert_eq!(par_iter.len(), 10);
+    ///
+    /// let vec: Vec<_> = par_iter.collect();
+    /// assert_eq!(vec.len(), 10);
+    /// ```
+    fn len(&self) -> usize;
+
+    /// Internal method used to define the behavior of this parallel
+    /// iterator. You should not need to call this directly.
+    ///
+    /// This method causes the iterator `self` to start producing
+    /// items and to feed them to the consumer `consumer` one by one.
+    /// It may split the consumer before doing so to create the
+    /// opportunity to produce in parallel. If a split does happen, it
+    /// will inform the consumer of the index where the split should
+    /// occur (unlike `ParallelIterator::drive_unindexed()`).
+    ///
+    /// See the [README] for more details on the internals of parallel
+    /// iterators.
+    ///
+    /// [README]: README.md
+    fn drive<C: Consumer<Self::Item>>(self, consumer: C) -> C::Result;
+
+    /// Internal method used to define the behavior of this parallel
+    /// iterator. You should not need to call this directly.
+    ///
+    /// This method converts the iterator into a producer P and then
+    /// invokes `callback.callback()` with P. Note that the type of
+    /// this producer is not defined as part of the API, since
+    /// `callback` must be defined generically for all producers. This
+    /// allows the producer type to contain references; it also means
+    /// that parallel iterators can adjust that type without causing a
+    /// breaking change.
+    ///
+    /// See the [README] for more details on the internals of parallel
+    /// iterators.
+    ///
+    /// [README]: README.md
+    fn with_producer<CB: ProducerCallback<Self::Item>>(self, callback: CB) -> CB::Output;
+}
+
+/// `FromParallelIterator` implements the creation of a collection
+/// from a [`ParallelIterator`]. By implementing
+/// `FromParallelIterator` for a given type, you define how it will be
+/// created from an iterator.
+///
+/// `FromParallelIterator` is used through [`ParallelIterator`]'s [`collect()`] method.
+///
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+/// [`collect()`]: trait.ParallelIterator.html#method.collect
+///
+/// # Examples
+///
+/// Implementing `FromParallelIterator` for your type:
+///
+/// ```
+/// use rayon::prelude::*;
+/// use std::mem;
+///
+/// struct BlackHole {
+///     mass: usize,
+/// }
+///
+/// impl<T: Send> FromParallelIterator<T> for BlackHole {
+///     fn from_par_iter<I>(par_iter: I) -> Self
+///         where I: IntoParallelIterator<Item = T>
+///     {
+///         let par_iter = par_iter.into_par_iter();
+///         BlackHole {
+///             mass: par_iter.count() * mem::size_of::<T>(),
+///         }
+///     }
+/// }
+///
+/// let bh: BlackHole = (0i32..1000).into_par_iter().collect();
+/// assert_eq!(bh.mass, 4000);
+/// ```
+pub trait FromParallelIterator<T>
+where
+    T: Send,
+{
+    /// Creates an instance of the collection from the parallel iterator `par_iter`.
+    ///
+    /// If your collection is not naturally parallel, the easiest (and
+    /// fastest) way to do this is often to collect `par_iter` into a
+    /// [`LinkedList`] or other intermediate data structure and then
+    /// sequentially extend your collection. However, a more 'native'
+    /// technique is to use the [`par_iter.fold`] or
+    /// [`par_iter.fold_with`] methods to create the collection.
+    /// Alternatively, if your collection is 'natively' parallel, you
+    /// can use `par_iter.for_each` to process each element in turn.
+    ///
+    /// [`LinkedList`]: https://doc.rust-lang.org/std/collections/struct.LinkedList.html
+    /// [`par_iter.fold`]: trait.ParallelIterator.html#method.fold
+    /// [`par_iter.fold_with`]: trait.ParallelIterator.html#method.fold_with
+    /// [`par_iter.for_each`]: trait.ParallelIterator.html#method.for_each
+    fn from_par_iter<I>(par_iter: I) -> Self
+    where
+        I: IntoParallelIterator<Item = T>;
+}
+
+/// `ParallelExtend` extends an existing collection with items from a [`ParallelIterator`].
+///
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+///
+/// # Examples
+///
+/// Implementing `ParallelExtend` for your type:
+///
+/// ```
+/// use rayon::prelude::*;
+/// use std::mem;
+///
+/// struct BlackHole {
+///     mass: usize,
+/// }
+///
+/// impl<T: Send> ParallelExtend<T> for BlackHole {
+///     fn par_extend<I>(&mut self, par_iter: I)
+///         where I: IntoParallelIterator<Item = T>
+///     {
+///         let par_iter = par_iter.into_par_iter();
+///         self.mass += par_iter.count() * mem::size_of::<T>();
+///     }
+/// }
+///
+/// let mut bh = BlackHole { mass: 0 };
+/// bh.par_extend(0i32..1000);
+/// assert_eq!(bh.mass, 4000);
+/// bh.par_extend(0i64..10);
+/// assert_eq!(bh.mass, 4080);
+/// ```
+pub trait ParallelExtend<T>
+where
+    T: Send,
+{
+    /// Extends an instance of the collection with the elements drawn
+    /// from the parallel iterator `par_iter`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let mut vec = vec![];
+    /// vec.par_extend(0..5);
+    /// vec.par_extend((0..5).into_par_iter().map(|i| i * i));
+    /// assert_eq!(vec, [0, 1, 2, 3, 4, 0, 1, 4, 9, 16]);
+    /// ```
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = T>;
+}
+
+/// `ParallelDrainFull` creates a parallel iterator that moves all items
+/// from a collection while retaining the original capacity.
+///
+/// Types which are indexable typically implement [`ParallelDrainRange`]
+/// instead, where you can drain fully with `par_drain(..)`.
+///
+/// [`ParallelDrainRange`]: trait.ParallelDrainRange.html
+pub trait ParallelDrainFull {
+    /// The draining parallel iterator type that will be created.
+    type Iter: ParallelIterator<Item = Self::Item>;
+
+    /// The type of item that the parallel iterator will produce.
+    /// This is usually the same as `IntoParallelIterator::Item`.
+    type Item: Send;
+
+    /// Returns a draining parallel iterator over an entire collection.
+    ///
+    /// When the iterator is dropped, all items are removed, even if the
+    /// iterator was not fully consumed. If the iterator is leaked, for example
+    /// using `std::mem::forget`, it is unspecified how many items are removed.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// use std::collections::{BinaryHeap, HashSet};
+    ///
+    /// let squares: HashSet<i32> = (0..10).map(|x| x * x).collect();
+    ///
+    /// let mut heap: BinaryHeap<_> = squares.iter().copied().collect();
+    /// assert_eq!(
+    ///     // heaps are drained in arbitrary order
+    ///     heap.par_drain()
+    ///         .inspect(|x| assert!(squares.contains(x)))
+    ///         .count(),
+    ///     squares.len(),
+    /// );
+    /// assert!(heap.is_empty());
+    /// assert!(heap.capacity() >= squares.len());
+    /// ```
+    fn par_drain(self) -> Self::Iter;
+}
+
+/// `ParallelDrainRange` creates a parallel iterator that moves a range of items
+/// from a collection while retaining the original capacity.
+///
+/// Types which are not indexable may implement [`ParallelDrainFull`] instead.
+///
+/// [`ParallelDrainFull`]: trait.ParallelDrainFull.html
+pub trait ParallelDrainRange<Idx = usize> {
+    /// The draining parallel iterator type that will be created.
+    type Iter: ParallelIterator<Item = Self::Item>;
+
+    /// The type of item that the parallel iterator will produce.
+    /// This is usually the same as `IntoParallelIterator::Item`.
+    type Item: Send;
+
+    /// Returns a draining parallel iterator over a range of the collection.
+    ///
+    /// When the iterator is dropped, all items in the range are removed, even
+    /// if the iterator was not fully consumed. If the iterator is leaked, for
+    /// example using `std::mem::forget`, it is unspecified how many items are
+    /// removed.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let squares: Vec<i32> = (0..10).map(|x| x * x).collect();
+    ///
+    /// println!("RangeFull");
+    /// let mut vec = squares.clone();
+    /// assert!(vec.par_drain(..)
+    ///            .eq(squares.par_iter().copied()));
+    /// assert!(vec.is_empty());
+    /// assert!(vec.capacity() >= squares.len());
+    ///
+    /// println!("RangeFrom");
+    /// let mut vec = squares.clone();
+    /// assert!(vec.par_drain(5..)
+    ///            .eq(squares[5..].par_iter().copied()));
+    /// assert_eq!(&vec[..], &squares[..5]);
+    /// assert!(vec.capacity() >= squares.len());
+    ///
+    /// println!("RangeTo");
+    /// let mut vec = squares.clone();
+    /// assert!(vec.par_drain(..5)
+    ///            .eq(squares[..5].par_iter().copied()));
+    /// assert_eq!(&vec[..], &squares[5..]);
+    /// assert!(vec.capacity() >= squares.len());
+    ///
+    /// println!("RangeToInclusive");
+    /// let mut vec = squares.clone();
+    /// assert!(vec.par_drain(..=5)
+    ///            .eq(squares[..=5].par_iter().copied()));
+    /// assert_eq!(&vec[..], &squares[6..]);
+    /// assert!(vec.capacity() >= squares.len());
+    ///
+    /// println!("Range");
+    /// let mut vec = squares.clone();
+    /// assert!(vec.par_drain(3..7)
+    ///            .eq(squares[3..7].par_iter().copied()));
+    /// assert_eq!(&vec[..3], &squares[..3]);
+    /// assert_eq!(&vec[3..], &squares[7..]);
+    /// assert!(vec.capacity() >= squares.len());
+    ///
+    /// println!("RangeInclusive");
+    /// let mut vec = squares.clone();
+    /// assert!(vec.par_drain(3..=7)
+    ///            .eq(squares[3..=7].par_iter().copied()));
+    /// assert_eq!(&vec[..3], &squares[..3]);
+    /// assert_eq!(&vec[3..], &squares[8..]);
+    /// assert!(vec.capacity() >= squares.len());
+    /// ```
+    fn par_drain<R: RangeBounds<Idx>>(self, range: R) -> Self::Iter;
+}
+
+/// We hide the `Try` trait in a private module, as it's only meant to be a
+/// stable clone of the standard library's `Try` trait, as yet unstable.
+mod private {
+    /// Clone of `std::ops::Try`.
+    ///
+    /// Implementing this trait is not permitted outside of `rayon`.
+    pub trait Try {
+        private_decl! {}
+
+        type Ok;
+        type Error;
+        fn into_result(self) -> Result<Self::Ok, Self::Error>;
+        fn from_ok(v: Self::Ok) -> Self;
+        fn from_error(v: Self::Error) -> Self;
+    }
+
+    impl<T> Try for Option<T> {
+        private_impl! {}
+
+        type Ok = T;
+        type Error = ();
+
+        fn into_result(self) -> Result<T, ()> {
+            self.ok_or(())
+        }
+        fn from_ok(v: T) -> Self {
+            Some(v)
+        }
+        fn from_error(_: ()) -> Self {
+            None
+        }
+    }
+
+    impl<T, E> Try for Result<T, E> {
+        private_impl! {}
+
+        type Ok = T;
+        type Error = E;
+
+        fn into_result(self) -> Result<T, E> {
+            self
+        }
+        fn from_ok(v: T) -> Self {
+            Ok(v)
+        }
+        fn from_error(v: E) -> Self {
+            Err(v)
+        }
+    }
+}
diff --git a/src/iter/multizip.rs b/src/iter/multizip.rs
new file mode 100644
index 0000000..8e36d08
--- /dev/null
+++ b/src/iter/multizip.rs
@@ -0,0 +1,338 @@
+use super::plumbing::*;
+use super::*;
+
+/// `MultiZip` is an iterator that zips up a tuple of parallel iterators to
+/// produce tuples of their items.
+///
+/// It is created by calling `into_par_iter()` on a tuple of types that
+/// implement `IntoParallelIterator`, or `par_iter()`/`par_iter_mut()` with
+/// types that are iterable by reference.
+///
+/// The implementation currently support tuples up to length 12.
+///
+/// # Examples
+///
+/// ```
+/// use rayon::prelude::*;
+///
+/// // This will iterate `r` by mutable reference, like `par_iter_mut()`, while
+/// // ranges are all iterated by value like `into_par_iter()`.
+/// // Note that the zipped iterator is only as long as the shortest input.
+/// let mut r = vec![0; 3];
+/// (&mut r, 1..10, 10..100, 100..1000).into_par_iter()
+///     .for_each(|(r, x, y, z)| *r = x * y + z);
+///
+/// assert_eq!(&r, &[1 * 10 + 100, 2 * 11 + 101, 3 * 12 + 102]);
+/// ```
+///
+/// For a group that should all be iterated by reference, you can use a tuple reference.
+///
+/// ```
+/// use rayon::prelude::*;
+///
+/// let xs: Vec<_> = (1..10).collect();
+/// let ys: Vec<_> = (10..100).collect();
+/// let zs: Vec<_> = (100..1000).collect();
+///
+/// // Reference each input separately with `IntoParallelIterator`:
+/// let r1: Vec<_> = (&xs, &ys, &zs).into_par_iter()
+///     .map(|(x, y, z)| x * y + z)
+///     .collect();
+///
+/// // Reference them all together with `IntoParallelRefIterator`:
+/// let r2: Vec<_> = (xs, ys, zs).par_iter()
+///     .map(|(x, y, z)| x * y + z)
+///     .collect();
+///
+/// assert_eq!(r1, r2);
+/// ```
+///
+/// Mutable references to a tuple will work similarly.
+///
+/// ```
+/// use rayon::prelude::*;
+///
+/// let mut xs: Vec<_> = (1..4).collect();
+/// let mut ys: Vec<_> = (-4..-1).collect();
+/// let mut zs = vec![0; 3];
+///
+/// // Mutably reference each input separately with `IntoParallelIterator`:
+/// (&mut xs, &mut ys, &mut zs).into_par_iter().for_each(|(x, y, z)| {
+///     *z += *x + *y;
+///     std::mem::swap(x, y);
+/// });
+///
+/// assert_eq!(xs, (vec![-4, -3, -2]));
+/// assert_eq!(ys, (vec![1, 2, 3]));
+/// assert_eq!(zs, (vec![-3, -1, 1]));
+///
+/// // Mutably reference them all together with `IntoParallelRefMutIterator`:
+/// let mut tuple = (xs, ys, zs);
+/// tuple.par_iter_mut().for_each(|(x, y, z)| {
+///     *z += *x + *y;
+///     std::mem::swap(x, y);
+/// });
+///
+/// assert_eq!(tuple, (vec![1, 2, 3], vec![-4, -3, -2], vec![-6, -2, 2]));
+/// ```
+#[derive(Debug, Clone)]
+pub struct MultiZip<T> {
+    tuple: T,
+}
+
+// These macros greedily consume 4 or 2 items first to achieve log2 nesting depth.
+// For example, 5 => 4,1 => (2,2),1.
+//
+// The tuples go up to 12, so we might want to greedily consume 8 too, but
+// the depth works out the same if we let that expand on the right:
+//      9 => 4,5 => (2,2),(4,1) => (2,2),((2,2),1)
+//     12 => 4,8 => (2,2),(4,4) => (2,2),((2,2),(2,2))
+//
+// But if we ever increase to 13, we would want to split 8,5 rather than 4,9.
+
+macro_rules! reduce {
+    ($a:expr, $b:expr, $c:expr, $d:expr, $( $x:expr ),+ => $fn:path) => {
+        reduce!(reduce!($a, $b, $c, $d => $fn),
+                reduce!($( $x ),+ => $fn)
+                => $fn)
+    };
+    ($a:expr, $b:expr, $( $x:expr ),+ => $fn:path) => {
+        reduce!(reduce!($a, $b => $fn),
+                reduce!($( $x ),+ => $fn)
+                => $fn)
+    };
+    ($a:expr, $b:expr => $fn:path) => { $fn($a, $b) };
+    ($a:expr => $fn:path) => { $a };
+}
+
+macro_rules! nest {
+    ($A:tt, $B:tt, $C:tt, $D:tt, $( $X:tt ),+) => {
+        (nest!($A, $B, $C, $D), nest!($( $X ),+))
+    };
+    ($A:tt, $B:tt, $( $X:tt ),+) => {
+        (($A, $B), nest!($( $X ),+))
+    };
+    ($A:tt, $B:tt) => { ($A, $B) };
+    ($A:tt) => { $A };
+}
+
+macro_rules! flatten {
+    ($( $T:ident ),+) => {{
+        #[allow(non_snake_case)]
+        fn flatten<$( $T ),+>(nest!($( $T ),+) : nest!($( $T ),+)) -> ($( $T, )+) {
+            ($( $T, )+)
+        }
+        flatten
+    }};
+}
+
+macro_rules! multizip_impls {
+    ($(
+        $Tuple:ident {
+            $(($idx:tt) -> $T:ident)+
+        }
+    )+) => {
+        $(
+            impl<$( $T, )+> IntoParallelIterator for ($( $T, )+)
+            where
+                $(
+                    $T: IntoParallelIterator,
+                    $T::Iter: IndexedParallelIterator,
+                )+
+            {
+                type Item = ($( $T::Item, )+);
+                type Iter = MultiZip<($( $T::Iter, )+)>;
+
+                fn into_par_iter(self) -> Self::Iter {
+                    MultiZip {
+                        tuple: ( $( self.$idx.into_par_iter(), )+ ),
+                    }
+                }
+            }
+
+            impl<'a, $( $T, )+> IntoParallelIterator for &'a ($( $T, )+)
+            where
+                $(
+                    $T: IntoParallelRefIterator<'a>,
+                    $T::Iter: IndexedParallelIterator,
+                )+
+            {
+                type Item = ($( $T::Item, )+);
+                type Iter = MultiZip<($( $T::Iter, )+)>;
+
+                fn into_par_iter(self) -> Self::Iter {
+                    MultiZip {
+                        tuple: ( $( self.$idx.par_iter(), )+ ),
+                    }
+                }
+            }
+
+            impl<'a, $( $T, )+> IntoParallelIterator for &'a mut ($( $T, )+)
+            where
+                $(
+                    $T: IntoParallelRefMutIterator<'a>,
+                    $T::Iter: IndexedParallelIterator,
+                )+
+            {
+                type Item = ($( $T::Item, )+);
+                type Iter = MultiZip<($( $T::Iter, )+)>;
+
+                fn into_par_iter(self) -> Self::Iter {
+                    MultiZip {
+                        tuple: ( $( self.$idx.par_iter_mut(), )+ ),
+                    }
+                }
+            }
+
+            impl<$( $T, )+> ParallelIterator for MultiZip<($( $T, )+)>
+            where
+                $( $T: IndexedParallelIterator, )+
+            {
+                type Item = ($( $T::Item, )+);
+
+                fn drive_unindexed<CONSUMER>(self, consumer: CONSUMER) -> CONSUMER::Result
+                where
+                    CONSUMER: UnindexedConsumer<Self::Item>,
+                {
+                    self.drive(consumer)
+                }
+
+                fn opt_len(&self) -> Option<usize> {
+                    Some(self.len())
+                }
+            }
+
+            impl<$( $T, )+> IndexedParallelIterator for MultiZip<($( $T, )+)>
+            where
+                $( $T: IndexedParallelIterator, )+
+            {
+                fn drive<CONSUMER>(self, consumer: CONSUMER) -> CONSUMER::Result
+                where
+                    CONSUMER: Consumer<Self::Item>,
+                {
+                    reduce!($( self.tuple.$idx ),+ => IndexedParallelIterator::zip)
+                        .map(flatten!($( $T ),+))
+                        .drive(consumer)
+                }
+
+                fn len(&self) -> usize {
+                    reduce!($( self.tuple.$idx.len() ),+ => Ord::min)
+                }
+
+                fn with_producer<CB>(self, callback: CB) -> CB::Output
+                where
+                    CB: ProducerCallback<Self::Item>,
+                {
+                    reduce!($( self.tuple.$idx ),+ => IndexedParallelIterator::zip)
+                        .map(flatten!($( $T ),+))
+                        .with_producer(callback)
+                }
+            }
+        )+
+    }
+}
+
+multizip_impls! {
+    Tuple1 {
+        (0) -> A
+    }
+    Tuple2 {
+        (0) -> A
+        (1) -> B
+    }
+    Tuple3 {
+        (0) -> A
+        (1) -> B
+        (2) -> C
+    }
+    Tuple4 {
+        (0) -> A
+        (1) -> B
+        (2) -> C
+        (3) -> D
+    }
+    Tuple5 {
+        (0) -> A
+        (1) -> B
+        (2) -> C
+        (3) -> D
+        (4) -> E
+    }
+    Tuple6 {
+        (0) -> A
+        (1) -> B
+        (2) -> C
+        (3) -> D
+        (4) -> E
+        (5) -> F
+    }
+    Tuple7 {
+        (0) -> A
+        (1) -> B
+        (2) -> C
+        (3) -> D
+        (4) -> E
+        (5) -> F
+        (6) -> G
+    }
+    Tuple8 {
+        (0) -> A
+        (1) -> B
+        (2) -> C
+        (3) -> D
+        (4) -> E
+        (5) -> F
+        (6) -> G
+        (7) -> H
+    }
+    Tuple9 {
+        (0) -> A
+        (1) -> B
+        (2) -> C
+        (3) -> D
+        (4) -> E
+        (5) -> F
+        (6) -> G
+        (7) -> H
+        (8) -> I
+    }
+    Tuple10 {
+        (0) -> A
+        (1) -> B
+        (2) -> C
+        (3) -> D
+        (4) -> E
+        (5) -> F
+        (6) -> G
+        (7) -> H
+        (8) -> I
+        (9) -> J
+    }
+    Tuple11 {
+        (0) -> A
+        (1) -> B
+        (2) -> C
+        (3) -> D
+        (4) -> E
+        (5) -> F
+        (6) -> G
+        (7) -> H
+        (8) -> I
+        (9) -> J
+        (10) -> K
+    }
+    Tuple12 {
+        (0) -> A
+        (1) -> B
+        (2) -> C
+        (3) -> D
+        (4) -> E
+        (5) -> F
+        (6) -> G
+        (7) -> H
+        (8) -> I
+        (9) -> J
+        (10) -> K
+        (11) -> L
+    }
+}
diff --git a/src/iter/noop.rs b/src/iter/noop.rs
new file mode 100644
index 0000000..1e55ecb
--- /dev/null
+++ b/src/iter/noop.rs
@@ -0,0 +1,59 @@
+use super::plumbing::*;
+
+pub(super) struct NoopConsumer;
+
+impl<T> Consumer<T> for NoopConsumer {
+    type Folder = NoopConsumer;
+    type Reducer = NoopReducer;
+    type Result = ();
+
+    fn split_at(self, _index: usize) -> (Self, Self, NoopReducer) {
+        (NoopConsumer, NoopConsumer, NoopReducer)
+    }
+
+    fn into_folder(self) -> Self {
+        self
+    }
+
+    fn full(&self) -> bool {
+        false
+    }
+}
+
+impl<T> Folder<T> for NoopConsumer {
+    type Result = ();
+
+    fn consume(self, _item: T) -> Self {
+        self
+    }
+
+    fn consume_iter<I>(self, iter: I) -> Self
+    where
+        I: IntoIterator<Item = T>,
+    {
+        iter.into_iter().for_each(drop);
+        self
+    }
+
+    fn complete(self) {}
+
+    fn full(&self) -> bool {
+        false
+    }
+}
+
+impl<T> UnindexedConsumer<T> for NoopConsumer {
+    fn split_off_left(&self) -> Self {
+        NoopConsumer
+    }
+
+    fn to_reducer(&self) -> NoopReducer {
+        NoopReducer
+    }
+}
+
+pub(super) struct NoopReducer;
+
+impl Reducer<()> for NoopReducer {
+    fn reduce(self, _left: (), _right: ()) {}
+}
diff --git a/src/iter/once.rs b/src/iter/once.rs
new file mode 100644
index 0000000..5140b6b
--- /dev/null
+++ b/src/iter/once.rs
@@ -0,0 +1,68 @@
+use crate::iter::plumbing::*;
+use crate::iter::*;
+
+/// Creates a parallel iterator that produces an element exactly once.
+///
+/// This admits no parallelism on its own, but it could be chained to existing
+/// parallel iterators to extend their contents, or otherwise used for any code
+/// that deals with generic parallel iterators.
+///
+/// # Examples
+///
+/// ```
+/// use rayon::prelude::*;
+/// use rayon::iter::once;
+///
+/// let pi = (0..1234).into_par_iter()
+///     .chain(once(-1))
+///     .chain(1234..10_000);
+///
+/// assert_eq!(pi.clone().count(), 10_001);
+/// assert_eq!(pi.clone().filter(|&x| x < 0).count(), 1);
+/// assert_eq!(pi.position_any(|x| x < 0), Some(1234));
+/// ```
+pub fn once<T: Send>(item: T) -> Once<T> {
+    Once { item }
+}
+
+/// Iterator adaptor for [the `once()` function](fn.once.html).
+#[derive(Clone, Debug)]
+pub struct Once<T: Send> {
+    item: T,
+}
+
+impl<T: Send> ParallelIterator for Once<T> {
+    type Item = T;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        self.drive(consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(1)
+    }
+}
+
+impl<T: Send> IndexedParallelIterator for Once<T> {
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        consumer.into_folder().consume(self.item).complete()
+    }
+
+    fn len(&self) -> usize {
+        1
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        // Let `OptionProducer` handle it.
+        Some(self.item).into_par_iter().with_producer(callback)
+    }
+}
diff --git a/src/iter/panic_fuse.rs b/src/iter/panic_fuse.rs
new file mode 100644
index 0000000..7487230
--- /dev/null
+++ b/src/iter/panic_fuse.rs
@@ -0,0 +1,342 @@
+use super::plumbing::*;
+use super::*;
+use std::sync::atomic::{AtomicBool, Ordering};
+use std::thread;
+
+/// `PanicFuse` is an adaptor that wraps an iterator with a fuse in case
+/// of panics, to halt all threads as soon as possible.
+///
+/// This struct is created by the [`panic_fuse()`] method on [`ParallelIterator`]
+///
+/// [`panic_fuse()`]: trait.ParallelIterator.html#method.panic_fuse
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Debug, Clone)]
+pub struct PanicFuse<I: ParallelIterator> {
+    base: I,
+}
+
+/// Helper that sets a bool to `true` if dropped while unwinding.
+#[derive(Clone)]
+struct Fuse<'a>(&'a AtomicBool);
+
+impl<'a> Drop for Fuse<'a> {
+    #[inline]
+    fn drop(&mut self) {
+        if thread::panicking() {
+            self.0.store(true, Ordering::Relaxed);
+        }
+    }
+}
+
+impl<'a> Fuse<'a> {
+    #[inline]
+    fn panicked(&self) -> bool {
+        self.0.load(Ordering::Relaxed)
+    }
+}
+
+impl<I> PanicFuse<I>
+where
+    I: ParallelIterator,
+{
+    /// Creates a new `PanicFuse` iterator.
+    pub(super) fn new(base: I) -> PanicFuse<I> {
+        PanicFuse { base }
+    }
+}
+
+impl<I> ParallelIterator for PanicFuse<I>
+where
+    I: ParallelIterator,
+{
+    type Item = I::Item;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let panicked = AtomicBool::new(false);
+        let consumer1 = PanicFuseConsumer {
+            base: consumer,
+            fuse: Fuse(&panicked),
+        };
+        self.base.drive_unindexed(consumer1)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        self.base.opt_len()
+    }
+}
+
+impl<I> IndexedParallelIterator for PanicFuse<I>
+where
+    I: IndexedParallelIterator,
+{
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        let panicked = AtomicBool::new(false);
+        let consumer1 = PanicFuseConsumer {
+            base: consumer,
+            fuse: Fuse(&panicked),
+        };
+        self.base.drive(consumer1)
+    }
+
+    fn len(&self) -> usize {
+        self.base.len()
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        return self.base.with_producer(Callback { callback });
+
+        struct Callback<CB> {
+            callback: CB,
+        }
+
+        impl<T, CB> ProducerCallback<T> for Callback<CB>
+        where
+            CB: ProducerCallback<T>,
+        {
+            type Output = CB::Output;
+
+            fn callback<P>(self, base: P) -> CB::Output
+            where
+                P: Producer<Item = T>,
+            {
+                let panicked = AtomicBool::new(false);
+                let producer = PanicFuseProducer {
+                    base,
+                    fuse: Fuse(&panicked),
+                };
+                self.callback.callback(producer)
+            }
+        }
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// Producer implementation
+
+struct PanicFuseProducer<'a, P> {
+    base: P,
+    fuse: Fuse<'a>,
+}
+
+impl<'a, P> Producer for PanicFuseProducer<'a, P>
+where
+    P: Producer,
+{
+    type Item = P::Item;
+    type IntoIter = PanicFuseIter<'a, P::IntoIter>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        PanicFuseIter {
+            base: self.base.into_iter(),
+            fuse: self.fuse,
+        }
+    }
+
+    fn min_len(&self) -> usize {
+        self.base.min_len()
+    }
+    fn max_len(&self) -> usize {
+        self.base.max_len()
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        let (left, right) = self.base.split_at(index);
+        (
+            PanicFuseProducer {
+                base: left,
+                fuse: self.fuse.clone(),
+            },
+            PanicFuseProducer {
+                base: right,
+                fuse: self.fuse,
+            },
+        )
+    }
+
+    fn fold_with<G>(self, folder: G) -> G
+    where
+        G: Folder<Self::Item>,
+    {
+        let folder1 = PanicFuseFolder {
+            base: folder,
+            fuse: self.fuse,
+        };
+        self.base.fold_with(folder1).base
+    }
+}
+
+struct PanicFuseIter<'a, I> {
+    base: I,
+    fuse: Fuse<'a>,
+}
+
+impl<'a, I> Iterator for PanicFuseIter<'a, I>
+where
+    I: Iterator,
+{
+    type Item = I::Item;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.fuse.panicked() {
+            None
+        } else {
+            self.base.next()
+        }
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.base.size_hint()
+    }
+}
+
+impl<'a, I> DoubleEndedIterator for PanicFuseIter<'a, I>
+where
+    I: DoubleEndedIterator,
+{
+    fn next_back(&mut self) -> Option<Self::Item> {
+        if self.fuse.panicked() {
+            None
+        } else {
+            self.base.next_back()
+        }
+    }
+}
+
+impl<'a, I> ExactSizeIterator for PanicFuseIter<'a, I>
+where
+    I: ExactSizeIterator,
+{
+    fn len(&self) -> usize {
+        self.base.len()
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// Consumer implementation
+
+struct PanicFuseConsumer<'a, C> {
+    base: C,
+    fuse: Fuse<'a>,
+}
+
+impl<'a, T, C> Consumer<T> for PanicFuseConsumer<'a, C>
+where
+    C: Consumer<T>,
+{
+    type Folder = PanicFuseFolder<'a, C::Folder>;
+    type Reducer = PanicFuseReducer<'a, C::Reducer>;
+    type Result = C::Result;
+
+    fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) {
+        let (left, right, reducer) = self.base.split_at(index);
+        (
+            PanicFuseConsumer {
+                base: left,
+                fuse: self.fuse.clone(),
+            },
+            PanicFuseConsumer {
+                base: right,
+                fuse: self.fuse.clone(),
+            },
+            PanicFuseReducer {
+                base: reducer,
+                _fuse: self.fuse,
+            },
+        )
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        PanicFuseFolder {
+            base: self.base.into_folder(),
+            fuse: self.fuse,
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.fuse.panicked() || self.base.full()
+    }
+}
+
+impl<'a, T, C> UnindexedConsumer<T> for PanicFuseConsumer<'a, C>
+where
+    C: UnindexedConsumer<T>,
+{
+    fn split_off_left(&self) -> Self {
+        PanicFuseConsumer {
+            base: self.base.split_off_left(),
+            fuse: self.fuse.clone(),
+        }
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        PanicFuseReducer {
+            base: self.base.to_reducer(),
+            _fuse: self.fuse.clone(),
+        }
+    }
+}
+
+struct PanicFuseFolder<'a, C> {
+    base: C,
+    fuse: Fuse<'a>,
+}
+
+impl<'a, T, C> Folder<T> for PanicFuseFolder<'a, C>
+where
+    C: Folder<T>,
+{
+    type Result = C::Result;
+
+    fn consume(mut self, item: T) -> Self {
+        self.base = self.base.consume(item);
+        self
+    }
+
+    fn consume_iter<I>(mut self, iter: I) -> Self
+    where
+        I: IntoIterator<Item = T>,
+    {
+        fn cool<'a, T>(fuse: &'a Fuse<'_>) -> impl Fn(&T) -> bool + 'a {
+            move |_| !fuse.panicked()
+        }
+
+        self.base = {
+            let fuse = &self.fuse;
+            let iter = iter.into_iter().take_while(cool(fuse));
+            self.base.consume_iter(iter)
+        };
+        self
+    }
+
+    fn complete(self) -> C::Result {
+        self.base.complete()
+    }
+
+    fn full(&self) -> bool {
+        self.fuse.panicked() || self.base.full()
+    }
+}
+
+struct PanicFuseReducer<'a, C> {
+    base: C,
+    _fuse: Fuse<'a>,
+}
+
+impl<'a, T, C> Reducer<T> for PanicFuseReducer<'a, C>
+where
+    C: Reducer<T>,
+{
+    fn reduce(self, left: T, right: T) -> T {
+        self.base.reduce(left, right)
+    }
+}
diff --git a/src/iter/par_bridge.rs b/src/iter/par_bridge.rs
new file mode 100644
index 0000000..4c2b96e
--- /dev/null
+++ b/src/iter/par_bridge.rs
@@ -0,0 +1,201 @@
+use crossbeam_deque::{Steal, Stealer, Worker};
+
+use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
+use std::sync::{Mutex, TryLockError};
+use std::thread::yield_now;
+
+use crate::current_num_threads;
+use crate::iter::plumbing::{bridge_unindexed, Folder, UnindexedConsumer, UnindexedProducer};
+use crate::iter::ParallelIterator;
+
+/// Conversion trait to convert an `Iterator` to a `ParallelIterator`.
+///
+/// This creates a "bridge" from a sequential iterator to a parallel one, by distributing its items
+/// across the Rayon thread pool. This has the advantage of being able to parallelize just about
+/// anything, but the resulting `ParallelIterator` can be less efficient than if you started with
+/// `par_iter` instead. However, it can still be useful for iterators that are difficult to
+/// parallelize by other means, like channels or file or network I/O.
+///
+/// The resulting iterator is not guaranteed to keep the order of the original iterator.
+///
+/// # Examples
+///
+/// To use this trait, take an existing `Iterator` and call `par_bridge` on it. After that, you can
+/// use any of the `ParallelIterator` methods:
+///
+/// ```
+/// use rayon::iter::ParallelBridge;
+/// use rayon::prelude::ParallelIterator;
+/// use std::sync::mpsc::channel;
+///
+/// let rx = {
+///     let (tx, rx) = channel();
+///
+///     tx.send("one!");
+///     tx.send("two!");
+///     tx.send("three!");
+///
+///     rx
+/// };
+///
+/// let mut output: Vec<&'static str> = rx.into_iter().par_bridge().collect();
+/// output.sort_unstable();
+///
+/// assert_eq!(&*output, &["one!", "three!", "two!"]);
+/// ```
+pub trait ParallelBridge: Sized {
+    /// Creates a bridge from this type to a `ParallelIterator`.
+    fn par_bridge(self) -> IterBridge<Self>;
+}
+
+impl<T: Iterator + Send> ParallelBridge for T
+where
+    T::Item: Send,
+{
+    fn par_bridge(self) -> IterBridge<Self> {
+        IterBridge { iter: self }
+    }
+}
+
+/// `IterBridge` is a parallel iterator that wraps a sequential iterator.
+///
+/// This type is created when using the `par_bridge` method on `ParallelBridge`. See the
+/// [`ParallelBridge`] documentation for details.
+///
+/// [`ParallelBridge`]: trait.ParallelBridge.html
+#[derive(Debug, Clone)]
+pub struct IterBridge<Iter> {
+    iter: Iter,
+}
+
+impl<Iter: Iterator + Send> ParallelIterator for IterBridge<Iter>
+where
+    Iter::Item: Send,
+{
+    type Item = Iter::Item;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let split_count = AtomicUsize::new(current_num_threads());
+        let worker = Worker::new_fifo();
+        let stealer = worker.stealer();
+        let done = AtomicBool::new(false);
+        let iter = Mutex::new((self.iter, worker));
+
+        bridge_unindexed(
+            IterParallelProducer {
+                split_count: &split_count,
+                done: &done,
+                iter: &iter,
+                items: stealer,
+            },
+            consumer,
+        )
+    }
+}
+
+struct IterParallelProducer<'a, Iter: Iterator> {
+    split_count: &'a AtomicUsize,
+    done: &'a AtomicBool,
+    iter: &'a Mutex<(Iter, Worker<Iter::Item>)>,
+    items: Stealer<Iter::Item>,
+}
+
+// manual clone because T doesn't need to be Clone, but the derive assumes it should be
+impl<'a, Iter: Iterator + 'a> Clone for IterParallelProducer<'a, Iter> {
+    fn clone(&self) -> Self {
+        IterParallelProducer {
+            split_count: self.split_count,
+            done: self.done,
+            iter: self.iter,
+            items: self.items.clone(),
+        }
+    }
+}
+
+impl<'a, Iter: Iterator + Send + 'a> UnindexedProducer for IterParallelProducer<'a, Iter>
+where
+    Iter::Item: Send,
+{
+    type Item = Iter::Item;
+
+    fn split(self) -> (Self, Option<Self>) {
+        let mut count = self.split_count.load(Ordering::SeqCst);
+
+        loop {
+            let done = self.done.load(Ordering::SeqCst);
+            match count.checked_sub(1) {
+                Some(new_count) if !done => {
+                    let last_count =
+                        self.split_count
+                            .compare_and_swap(count, new_count, Ordering::SeqCst);
+                    if last_count == count {
+                        return (self.clone(), Some(self));
+                    } else {
+                        count = last_count;
+                    }
+                }
+                _ => {
+                    return (self, None);
+                }
+            }
+        }
+    }
+
+    fn fold_with<F>(self, mut folder: F) -> F
+    where
+        F: Folder<Self::Item>,
+    {
+        loop {
+            match self.items.steal() {
+                Steal::Success(it) => {
+                    folder = folder.consume(it);
+                    if folder.full() {
+                        return folder;
+                    }
+                }
+                Steal::Empty => {
+                    if self.done.load(Ordering::SeqCst) {
+                        // the iterator is out of items, no use in continuing
+                        return folder;
+                    } else {
+                        // our cache is out of items, time to load more from the iterator
+                        match self.iter.try_lock() {
+                            Ok(mut guard) => {
+                                let count = current_num_threads();
+                                let count = (count * count) * 2;
+
+                                let (ref mut iter, ref worker) = *guard;
+
+                                // while worker.len() < count {
+                                // FIXME the new deque doesn't let us count items.  We can just
+                                // push a number of items, but that doesn't consider active
+                                // stealers elsewhere.
+                                for _ in 0..count {
+                                    if let Some(it) = iter.next() {
+                                        worker.push(it);
+                                    } else {
+                                        self.done.store(true, Ordering::SeqCst);
+                                        break;
+                                    }
+                                }
+                            }
+                            Err(TryLockError::WouldBlock) => {
+                                // someone else has the mutex, just sit tight until it's ready
+                                yield_now(); //TODO: use a thread=pool-aware yield? (#548)
+                            }
+                            Err(TryLockError::Poisoned(_)) => {
+                                // any panics from other threads will have been caught by the pool,
+                                // and will be re-thrown when joined - just exit
+                                return folder;
+                            }
+                        }
+                    }
+                }
+                Steal::Retry => (),
+            }
+        }
+    }
+}
diff --git a/src/iter/plumbing/README.md b/src/iter/plumbing/README.md
new file mode 100644
index 0000000..cd94eae
--- /dev/null
+++ b/src/iter/plumbing/README.md
@@ -0,0 +1,315 @@
+# Parallel Iterators
+
+These are some notes on the design of the parallel iterator traits.
+This file does not describe how to **use** parallel iterators.
+
+## The challenge
+
+Parallel iterators are more complicated than sequential iterators.
+The reason is that they have to be able to split themselves up and
+operate in parallel across the two halves.
+
+The current design for parallel iterators has two distinct modes in
+which they can be used; as we will see, not all iterators support both
+modes (which is why there are two):
+
+- **Pull mode** (the `Producer` and `UnindexedProducer` traits): in this mode,
+  the iterator is asked to produce the next item using a call to `next`. This
+  is basically like a normal iterator, but with a twist: you can split the
+  iterator in half to produce disjoint items in separate threads.
+  - in the `Producer` trait, splitting is done with `split_at`, which accepts
+    an index where the split should be performed. Only indexed iterators can
+    work in this mode, as they know exactly how much data they will produce,
+    and how to locate the requested index.
+  - in the `UnindexedProducer` trait, splitting is done with `split`, which
+    simply requests that the producer divide itself *approximately* in half.
+    This is useful when the exact length and/or layout is unknown, as with
+    `String` characters, or when the length might exceed `usize`, as with
+    `Range<u64>` on 32-bit platforms.
+    - In theory, any `Producer` could act unindexed, but we don't currently
+      use that possibility.  When you know the exact length, a `split` can
+      simply be implemented as `split_at(length/2)`.
+- **Push mode** (the `Consumer` and `UnindexedConsumer` traits): in
+  this mode, the iterator instead is *given* each item in turn, which
+  is then processed. This is the opposite of a normal iterator. It's
+  more like a `for_each` call: each time a new item is produced, the
+  `consume` method is called with that item. (The traits themselves are
+  a bit more complex, as they support state that can be threaded
+  through and ultimately reduced.) Unlike producers, there are two
+  variants of consumers. The difference is how the split is performed:
+  - in the `Consumer` trait, splitting is done with `split_at`, which
+    accepts an index where the split should be performed. All
+    iterators can work in this mode. The resulting halves thus have an
+    idea about how much data they expect to consume.
+  - in the `UnindexedConsumer` trait, splitting is done with
+    `split_off_left`.  There is no index: the resulting halves must be
+    prepared to process any amount of data, and they don't know where that
+    data falls in the overall stream.
+    - Not all consumers can operate in this mode. It works for
+      `for_each` and `reduce`, for example, but it does not work for
+      `collect_into_vec`, since in that case the position of each item is
+      important for knowing where it ends up in the target collection.
+
+## How iterator execution proceeds
+
+We'll walk through this example iterator chain to start. This chain
+demonstrates more-or-less the full complexity of what can happen.
+
+```rust
+vec1.par_iter()
+    .zip(vec2.par_iter())
+    .flat_map(some_function)
+    .for_each(some_other_function)
+```
+
+To handle an iterator chain, we start by creating consumers. This
+works from the end. So in this case, the call to `for_each` is the
+final step, so it will create a `ForEachConsumer` that, given an item,
+just calls `some_other_function` with that item. (`ForEachConsumer` is
+a very simple consumer because it doesn't need to thread any state
+between items at all.)
+
+Now, the `for_each` call will pass this consumer to the base iterator,
+which is the `flat_map`. It will do this by calling the `drive_unindexed`
+method on the `ParallelIterator` trait. `drive_unindexed` basically
+says "produce items for this iterator and feed them to this consumer";
+it only works for unindexed consumers.
+
+(As an aside, it is interesting that only some consumers can work in
+unindexed mode, but all producers can *drive* an unindexed consumer.
+In contrast, only some producers can drive an *indexed* consumer, but
+all consumers can be supplied indexes. Isn't variance neat.)
+
+As it happens, `FlatMap` only works with unindexed consumers anyway.
+This is because flat-map basically has no idea how many items it will
+produce. If you ask flat-map to produce the 22nd item, it can't do it,
+at least not without some intermediate state. It doesn't know whether
+processing the first item will create 1 item, 3 items, or 100;
+therefore, to produce an arbitrary item, it would basically just have
+to start at the beginning and execute sequentially, which is not what
+we want. But for unindexed consumers, this doesn't matter, since they
+don't need to know how much data they will get.
+
+Therefore, `FlatMap` can wrap the `ForEachConsumer` with a
+`FlatMapConsumer` that feeds to it. This `FlatMapConsumer` will be
+given one item. It will then invoke `some_function` to get a parallel
+iterator out. It will then ask this new parallel iterator to drive the
+`ForEachConsumer`. The `drive_unindexed` method on `flat_map` can then
+pass the `FlatMapConsumer` up the chain to the previous item, which is
+`zip`. At this point, something interesting happens.
+
+## Switching from push to pull mode
+
+If you think about `zip`, it can't really be implemented as a
+consumer, at least not without an intermediate thread and some
+channels or something (or maybe coroutines). The problem is that it
+has to walk two iterators *in lockstep*. Basically, it can't call two
+`drive` methods simultaneously, it can only call one at a time. So at
+this point, the `zip` iterator needs to switch from *push mode* into
+*pull mode*.
+
+You'll note that `Zip` is only usable if its inputs implement
+`IndexedParallelIterator`, meaning that they can produce data starting
+at random points in the stream. This need to switch to push mode is
+exactly why. If we want to split a zip iterator at position 22, we
+need to be able to start zipping items from index 22 right away,
+without having to start from index 0.
+
+Anyway, so at this point, the `drive_unindexed` method for `Zip` stops
+creating consumers. Instead, it creates a *producer*, a `ZipProducer`,
+to be exact, and calls the `bridge` function in the `internals`
+module. Creating a `ZipProducer` will in turn create producers for
+the two iterators being zipped. This is possible because they both
+implement `IndexedParallelIterator`.
+
+The `bridge` function will then connect the consumer, which is
+handling the `flat_map` and `for_each`, with the producer, which is
+handling the `zip` and its preecessors. It will split down until the
+chunks seem reasonably small, then pull items from the producer and
+feed them to the consumer.
+
+## The base case
+
+The other time that `bridge` gets used is when we bottom out in an
+indexed producer, such as a slice or range.  There is also a
+`bridge_unindexed` equivalent for - you guessed it - unindexed producers,
+such as string characters.
+
+<a name="producer-callback">
+
+## What on earth is `ProducerCallback`?
+
+We saw that when you call a parallel action method like
+`par_iter.reduce()`, that will create a "reducing" consumer and then
+invoke `par_iter.drive_unindexed()` (or `par_iter.drive()`) as
+appropriate. This may create yet more consumers as we proceed up the
+parallel iterator chain. But at some point we're going to get to the
+start of the chain, or to a parallel iterator (like `zip()`) that has
+to coordinate multiple inputs. At that point, we need to start
+converting parallel iterators into producers.
+
+The way we do this is by invoking the method `with_producer()`, defined on
+`IndexedParallelIterator`. This is a callback scheme. In an ideal world,
+it would work like this:
+
+```rust
+base_iter.with_producer(|base_producer| {
+    // here, `base_producer` is the producer for `base_iter`
+});
+```
+
+In that case, we could implement a combinator like `map()` by getting
+the producer for the base iterator, wrapping it to make our own
+`MapProducer`, and then passing that to the callback. Something like
+this:
+
+```rust
+struct MapProducer<'f, P, F: 'f> {
+    base: P,
+    map_op: &'f F,
+}
+
+impl<I, F> IndexedParallelIterator for Map<I, F>
+    where I: IndexedParallelIterator,
+          F: MapOp<I::Item>,
+{
+    fn with_producer<CB>(self, callback: CB) -> CB::Output {
+        let map_op = &self.map_op;
+        self.base_iter.with_producer(|base_producer| {
+            // Here `producer` is the producer for `self.base_iter`.
+            // Wrap that to make a `MapProducer`
+            let map_producer = MapProducer {
+                base: base_producer,
+                map_op: map_op
+            };
+
+            // invoke the callback with the wrapped version
+            callback(map_producer)
+        });
+    }
+});
+```
+
+This example demonstrates some of the power of the callback scheme.
+It winds up being a very flexible setup. For one thing, it means we
+can take ownership of `par_iter`; we can then in turn give ownership
+away of its bits and pieces into the producer (this is very useful if
+the iterator owns an `&mut` slice, for example), or create shared
+references and put *those* in the producer. In the case of map, for
+example, the parallel iterator owns the `map_op`, and we borrow
+references to it which we then put into the `MapProducer` (this means
+the `MapProducer` can easily split itself and share those references).
+The `with_producer` method can also create resources that are needed
+during the parallel execution, since the producer does not have to be
+returned.
+
+Unfortunately there is a catch. We can't actually use closures the way
+I showed you. To see why, think about the type that `map_producer`
+would have to have. If we were going to write the `with_producer`
+method using a closure, it would have to look something like this:
+
+```rust
+pub trait IndexedParallelIterator: ParallelIterator {
+    type Producer;
+    fn with_producer<CB, R>(self, callback: CB) -> R
+        where CB: FnOnce(Self::Producer) -> R;
+    ...
+}
+```
+
+Note that we had to add this associated type `Producer` so that
+we could specify the argument of the callback to be `Self::Producer`.
+Now, imagine trying to write that `MapProducer` impl using this style:
+
+```rust
+impl<I, F> IndexedParallelIterator for Map<I, F>
+    where I: IndexedParallelIterator,
+          F: MapOp<I::Item>,
+{
+    type MapProducer = MapProducer<'f, P::Producer, F>;
+    //                             ^^ wait, what is this `'f`?
+
+    fn with_producer<CB, R>(self, callback: CB) -> R
+        where CB: FnOnce(Self::Producer) -> R
+    {
+        let map_op = &self.map_op;
+        //  ^^^^^^ `'f` is (conceptually) the lifetime of this reference,
+        //         so it will be different for each call to `with_producer`!
+    }
+}
+```
+
+This may look familiar to you: it's the same problem that we have
+trying to define an `Iterable` trait. Basically, the producer type
+needs to include a lifetime (here, `'f`) that refers to the body of
+`with_producer` and hence is not in scope at the impl level.
+
+If we had [associated type constructors][1598], we could solve this
+problem that way. But there is another solution. We can use a
+dedicated callback trait like `ProducerCallback`, instead of `FnOnce`:
+
+[1598]: https://github.com/rust-lang/rfcs/pull/1598
+
+```rust
+pub trait ProducerCallback<T> {
+    type Output;
+    fn callback<P>(self, producer: P) -> Self::Output
+        where P: Producer<Item=T>;
+}
+```
+
+Using this trait, the signature of `with_producer()` looks like this:
+
+```rust
+fn with_producer<CB: ProducerCallback<Self::Item>>(self, callback: CB) -> CB::Output;
+```
+
+Notice that this signature **never has to name the producer type** --
+there is no associated type `Producer` anymore. This is because the
+`callback()` method is generically over **all** producers `P`.
+
+The problem is that now the `||` sugar doesn't work anymore. So we
+have to manually create the callback struct, which is a mite tedious.
+So our `MapProducer` code looks like this:
+
+```rust
+impl<I, F> IndexedParallelIterator for Map<I, F>
+    where I: IndexedParallelIterator,
+          F: MapOp<I::Item>,
+{
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+        where CB: ProducerCallback<Self::Item>
+    {
+        return self.base.with_producer(Callback { callback: callback, map_op: self.map_op });
+        //                             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+        //                             Manual version of the closure sugar: create an instance
+        //                             of a struct that implements `ProducerCallback`.
+
+        // The struct declaration. Each field is something that need to capture from the
+        // creating scope.
+        struct Callback<CB, F> {
+            callback: CB,
+            map_op: F,
+        }
+
+        // Implement the `ProducerCallback` trait. This is pure boilerplate.
+        impl<T, F, CB> ProducerCallback<T> for Callback<CB, F>
+            where F: MapOp<T>,
+                  CB: ProducerCallback<F::Output>
+        {
+            type Output = CB::Output;
+
+            fn callback<P>(self, base: P) -> CB::Output
+                where P: Producer<Item=T>
+            {
+                // The body of the closure is here:
+                let producer = MapProducer { base: base,
+                                             map_op: &self.map_op };
+                self.callback.callback(producer)
+            }
+        }
+    }
+}
+```
+
+OK, a bit tedious, but it works!
diff --git a/src/iter/plumbing/mod.rs b/src/iter/plumbing/mod.rs
new file mode 100644
index 0000000..71d4fb4
--- /dev/null
+++ b/src/iter/plumbing/mod.rs
@@ -0,0 +1,484 @@
+//! Traits and functions used to implement parallel iteration.  These are
+//! low-level details -- users of parallel iterators should not need to
+//! interact with them directly.  See [the `plumbing` README][r] for a general overview.
+//!
+//! [r]: https://github.com/rayon-rs/rayon/blob/master/src/iter/plumbing/README.md
+
+use crate::join_context;
+
+use super::IndexedParallelIterator;
+
+use std::cmp;
+use std::usize;
+
+/// The `ProducerCallback` trait is a kind of generic closure,
+/// [analogous to `FnOnce`][FnOnce]. See [the corresponding section in
+/// the plumbing README][r] for more details.
+///
+/// [r]: https://github.com/rayon-rs/rayon/blob/master/src/iter/plumbing/README.md#producer-callback
+/// [FnOnce]: https://doc.rust-lang.org/std/ops/trait.FnOnce.html
+pub trait ProducerCallback<T> {
+    /// The type of value returned by this callback. Analogous to
+    /// [`Output` from the `FnOnce` trait][Output].
+    ///
+    /// [Output]: https://doc.rust-lang.org/std/ops/trait.FnOnce.html#associatedtype.Output
+    type Output;
+
+    /// Invokes the callback with the given producer as argument. The
+    /// key point of this trait is that this method is generic over
+    /// `P`, and hence implementors must be defined for any producer.
+    fn callback<P>(self, producer: P) -> Self::Output
+    where
+        P: Producer<Item = T>;
+}
+
+/// A `Producer` is effectively a "splittable `IntoIterator`". That
+/// is, a producer is a value which can be converted into an iterator
+/// at any time: at that point, it simply produces items on demand,
+/// like any iterator. But what makes a `Producer` special is that,
+/// *before* we convert to an iterator, we can also **split** it at a
+/// particular point using the `split_at` method. This will yield up
+/// two producers, one producing the items before that point, and one
+/// producing the items after that point (these two producers can then
+/// independently be split further, or be converted into iterators).
+/// In Rayon, this splitting is used to divide between threads.
+/// See [the `plumbing` README][r] for further details.
+///
+/// Note that each producer will always produce a fixed number of
+/// items N. However, this number N is not queryable through the API;
+/// the consumer is expected to track it.
+///
+/// NB. You might expect `Producer` to extend the `IntoIterator`
+/// trait.  However, [rust-lang/rust#20671][20671] prevents us from
+/// declaring the DoubleEndedIterator and ExactSizeIterator
+/// constraints on a required IntoIterator trait, so we inline
+/// IntoIterator here until that issue is fixed.
+///
+/// [r]: https://github.com/rayon-rs/rayon/blob/master/src/iter/plumbing/README.md
+/// [20671]: https://github.com/rust-lang/rust/issues/20671
+pub trait Producer: Send + Sized {
+    /// The type of item that will be produced by this producer once
+    /// it is converted into an iterator.
+    type Item;
+
+    /// The type of iterator we will become.
+    type IntoIter: Iterator<Item = Self::Item> + DoubleEndedIterator + ExactSizeIterator;
+
+    /// Convert `self` into an iterator; at this point, no more parallel splits
+    /// are possible.
+    fn into_iter(self) -> Self::IntoIter;
+
+    /// The minimum number of items that we will process
+    /// sequentially. Defaults to 1, which means that we will split
+    /// all the way down to a single item. This can be raised higher
+    /// using the [`with_min_len`] method, which will force us to
+    /// create sequential tasks at a larger granularity. Note that
+    /// Rayon automatically normally attempts to adjust the size of
+    /// parallel splits to reduce overhead, so this should not be
+    /// needed.
+    ///
+    /// [`with_min_len`]: ../trait.IndexedParallelIterator.html#method.with_min_len
+    fn min_len(&self) -> usize {
+        1
+    }
+
+    /// The maximum number of items that we will process
+    /// sequentially. Defaults to MAX, which means that we can choose
+    /// not to split at all. This can be lowered using the
+    /// [`with_max_len`] method, which will force us to create more
+    /// parallel tasks. Note that Rayon automatically normally
+    /// attempts to adjust the size of parallel splits to reduce
+    /// overhead, so this should not be needed.
+    ///
+    /// [`with_max_len`]: ../trait.IndexedParallelIterator.html#method.with_max_len
+    fn max_len(&self) -> usize {
+        usize::MAX
+    }
+
+    /// Split into two producers; one produces items `0..index`, the
+    /// other `index..N`. Index must be less than or equal to `N`.
+    fn split_at(self, index: usize) -> (Self, Self);
+
+    /// Iterate the producer, feeding each element to `folder`, and
+    /// stop when the folder is full (or all elements have been consumed).
+    ///
+    /// The provided implementation is sufficient for most iterables.
+    fn fold_with<F>(self, folder: F) -> F
+    where
+        F: Folder<Self::Item>,
+    {
+        folder.consume_iter(self.into_iter())
+    }
+}
+
+/// A consumer is effectively a [generalized "fold" operation][fold],
+/// and in fact each consumer will eventually be converted into a
+/// [`Folder`]. What makes a consumer special is that, like a
+/// [`Producer`], it can be **split** into multiple consumers using
+/// the `split_at` method. When a consumer is split, it produces two
+/// consumers, as well as a **reducer**. The two consumers can be fed
+/// items independently, and when they are done the reducer is used to
+/// combine their two results into one. See [the `plumbing`
+/// README][r] for further details.
+///
+/// [r]: https://github.com/rayon-rs/rayon/blob/master/src/iter/plumbing/README.md
+/// [fold]: https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.fold
+/// [`Folder`]: trait.Folder.html
+/// [`Producer`]: trait.Producer.html
+pub trait Consumer<Item>: Send + Sized {
+    /// The type of folder that this consumer can be converted into.
+    type Folder: Folder<Item, Result = Self::Result>;
+
+    /// The type of reducer that is produced if this consumer is split.
+    type Reducer: Reducer<Self::Result>;
+
+    /// The type of result that this consumer will ultimately produce.
+    type Result: Send;
+
+    /// Divide the consumer into two consumers, one processing items
+    /// `0..index` and one processing items from `index..`. Also
+    /// produces a reducer that can be used to reduce the results at
+    /// the end.
+    fn split_at(self, index: usize) -> (Self, Self, Self::Reducer);
+
+    /// Convert the consumer into a folder that can consume items
+    /// sequentially, eventually producing a final result.
+    fn into_folder(self) -> Self::Folder;
+
+    /// Hint whether this `Consumer` would like to stop processing
+    /// further items, e.g. if a search has been completed.
+    fn full(&self) -> bool;
+}
+
+/// The `Folder` trait encapsulates [the standard fold
+/// operation][fold].  It can be fed many items using the `consume`
+/// method. At the end, once all items have been consumed, it can then
+/// be converted (using `complete`) into a final value.
+///
+/// [fold]: https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.fold
+pub trait Folder<Item>: Sized {
+    /// The type of result that will ultimately be produced by the folder.
+    type Result;
+
+    /// Consume next item and return new sequential state.
+    fn consume(self, item: Item) -> Self;
+
+    /// Consume items from the iterator until full, and return new sequential state.
+    ///
+    /// This method is **optional**. The default simply iterates over
+    /// `iter`, invoking `consume` and checking after each iteration
+    /// whether `full` returns false.
+    ///
+    /// The main reason to override it is if you can provide a more
+    /// specialized, efficient implementation.
+    fn consume_iter<I>(mut self, iter: I) -> Self
+    where
+        I: IntoIterator<Item = Item>,
+    {
+        for item in iter {
+            self = self.consume(item);
+            if self.full() {
+                break;
+            }
+        }
+        self
+    }
+
+    /// Finish consuming items, produce final result.
+    fn complete(self) -> Self::Result;
+
+    /// Hint whether this `Folder` would like to stop processing
+    /// further items, e.g. if a search has been completed.
+    fn full(&self) -> bool;
+}
+
+/// The reducer is the final step of a `Consumer` -- after a consumer
+/// has been split into two parts, and each of those parts has been
+/// fully processed, we are left with two results. The reducer is then
+/// used to combine those two results into one. See [the `plumbing`
+/// README][r] for further details.
+///
+/// [r]: https://github.com/rayon-rs/rayon/blob/master/src/iter/plumbing/README.md
+pub trait Reducer<Result> {
+    /// Reduce two final results into one; this is executed after a
+    /// split.
+    fn reduce(self, left: Result, right: Result) -> Result;
+}
+
+/// A stateless consumer can be freely copied. These consumers can be
+/// used like regular consumers, but they also support a
+/// `split_off_left` method that does not take an index to split, but
+/// simply splits at some arbitrary point (`for_each`, for example,
+/// produces an unindexed consumer).
+pub trait UnindexedConsumer<I>: Consumer<I> {
+    /// Splits off a "left" consumer and returns it. The `self`
+    /// consumer should then be used to consume the "right" portion of
+    /// the data. (The ordering matters for methods like find_first --
+    /// values produced by the returned value are given precedence
+    /// over values produced by `self`.) Once the left and right
+    /// halves have been fully consumed, you should reduce the results
+    /// with the result of `to_reducer`.
+    fn split_off_left(&self) -> Self;
+
+    /// Creates a reducer that can be used to combine the results from
+    /// a split consumer.
+    fn to_reducer(&self) -> Self::Reducer;
+}
+
+/// A variant on `Producer` which does not know its exact length or
+/// cannot represent it in a `usize`. These producers act like
+/// ordinary producers except that they cannot be told to split at a
+/// particular point. Instead, you just ask them to split 'somewhere'.
+///
+/// (In principle, `Producer` could extend this trait; however, it
+/// does not because to do so would require producers to carry their
+/// own length with them.)
+pub trait UnindexedProducer: Send + Sized {
+    /// The type of item returned by this producer.
+    type Item;
+
+    /// Split midway into a new producer if possible, otherwise return `None`.
+    fn split(self) -> (Self, Option<Self>);
+
+    /// Iterate the producer, feeding each element to `folder`, and
+    /// stop when the folder is full (or all elements have been consumed).
+    fn fold_with<F>(self, folder: F) -> F
+    where
+        F: Folder<Self::Item>;
+}
+
+/// A splitter controls the policy for splitting into smaller work items.
+///
+/// Thief-splitting is an adaptive policy that starts by splitting into
+/// enough jobs for every worker thread, and then resets itself whenever a
+/// job is actually stolen into a different thread.
+#[derive(Clone, Copy)]
+struct Splitter {
+    /// The `splits` tell us approximately how many remaining times we'd
+    /// like to split this job.  We always just divide it by two though, so
+    /// the effective number of pieces will be `next_power_of_two()`.
+    splits: usize,
+}
+
+impl Splitter {
+    #[inline]
+    fn new() -> Splitter {
+        Splitter {
+            splits: crate::current_num_threads(),
+        }
+    }
+
+    #[inline]
+    fn try_split(&mut self, stolen: bool) -> bool {
+        let Splitter { splits } = *self;
+
+        if stolen {
+            // This job was stolen!  Reset the number of desired splits to the
+            // thread count, if that's more than we had remaining anyway.
+            self.splits = cmp::max(crate::current_num_threads(), self.splits / 2);
+            true
+        } else if splits > 0 {
+            // We have splits remaining, make it so.
+            self.splits /= 2;
+            true
+        } else {
+            // Not stolen, and no more splits -- we're done!
+            false
+        }
+    }
+}
+
+/// The length splitter is built on thief-splitting, but additionally takes
+/// into account the remaining length of the iterator.
+#[derive(Clone, Copy)]
+struct LengthSplitter {
+    inner: Splitter,
+
+    /// The smallest we're willing to divide into.  Usually this is just 1,
+    /// but you can choose a larger working size with `with_min_len()`.
+    min: usize,
+}
+
+impl LengthSplitter {
+    /// Creates a new splitter based on lengths.
+    ///
+    /// The `min` is a hard lower bound.  We'll never split below that, but
+    /// of course an iterator might start out smaller already.
+    ///
+    /// The `max` is an upper bound on the working size, used to determine
+    /// the minimum number of times we need to split to get under that limit.
+    /// The adaptive algorithm may very well split even further, but never
+    /// smaller than the `min`.
+    #[inline]
+    fn new(min: usize, max: usize, len: usize) -> LengthSplitter {
+        let mut splitter = LengthSplitter {
+            inner: Splitter::new(),
+            min: cmp::max(min, 1),
+        };
+
+        // Divide the given length by the max working length to get the minimum
+        // number of splits we need to get under that max.  This rounds down,
+        // but the splitter actually gives `next_power_of_two()` pieces anyway.
+        // e.g. len 12345 / max 100 = 123 min_splits -> 128 pieces.
+        let min_splits = len / cmp::max(max, 1);
+
+        // Only update the value if it's not splitting enough already.
+        if min_splits > splitter.inner.splits {
+            splitter.inner.splits = min_splits;
+        }
+
+        splitter
+    }
+
+    #[inline]
+    fn try_split(&mut self, len: usize, stolen: bool) -> bool {
+        // If splitting wouldn't make us too small, try the inner splitter.
+        len / 2 >= self.min && self.inner.try_split(stolen)
+    }
+}
+
+/// This helper function is used to "connect" a parallel iterator to a
+/// consumer. It will convert the `par_iter` into a producer P and
+/// then pull items from P and feed them to `consumer`, splitting and
+/// creating parallel threads as needed.
+///
+/// This is useful when you are implementing your own parallel
+/// iterators: it is often used as the definition of the
+/// [`drive_unindexed`] or [`drive`] methods.
+///
+/// [`drive_unindexed`]: ../trait.ParallelIterator.html#tymethod.drive_unindexed
+/// [`drive`]: ../trait.IndexedParallelIterator.html#tymethod.drive
+pub fn bridge<I, C>(par_iter: I, consumer: C) -> C::Result
+where
+    I: IndexedParallelIterator,
+    C: Consumer<I::Item>,
+{
+    let len = par_iter.len();
+    return par_iter.with_producer(Callback { len, consumer });
+
+    struct Callback<C> {
+        len: usize,
+        consumer: C,
+    }
+
+    impl<C, I> ProducerCallback<I> for Callback<C>
+    where
+        C: Consumer<I>,
+    {
+        type Output = C::Result;
+        fn callback<P>(self, producer: P) -> C::Result
+        where
+            P: Producer<Item = I>,
+        {
+            bridge_producer_consumer(self.len, producer, self.consumer)
+        }
+    }
+}
+
+/// This helper function is used to "connect" a producer and a
+/// consumer. You may prefer to call [`bridge`], which wraps this
+/// function. This function will draw items from `producer` and feed
+/// them to `consumer`, splitting and creating parallel tasks when
+/// needed.
+///
+/// This is useful when you are implementing your own parallel
+/// iterators: it is often used as the definition of the
+/// [`drive_unindexed`] or [`drive`] methods.
+///
+/// [`bridge`]: fn.bridge.html
+/// [`drive_unindexed`]: ../trait.ParallelIterator.html#tymethod.drive_unindexed
+/// [`drive`]: ../trait.IndexedParallelIterator.html#tymethod.drive
+pub fn bridge_producer_consumer<P, C>(len: usize, producer: P, consumer: C) -> C::Result
+where
+    P: Producer,
+    C: Consumer<P::Item>,
+{
+    let splitter = LengthSplitter::new(producer.min_len(), producer.max_len(), len);
+    return helper(len, false, splitter, producer, consumer);
+
+    fn helper<P, C>(
+        len: usize,
+        migrated: bool,
+        mut splitter: LengthSplitter,
+        producer: P,
+        consumer: C,
+    ) -> C::Result
+    where
+        P: Producer,
+        C: Consumer<P::Item>,
+    {
+        if consumer.full() {
+            consumer.into_folder().complete()
+        } else if splitter.try_split(len, migrated) {
+            let mid = len / 2;
+            let (left_producer, right_producer) = producer.split_at(mid);
+            let (left_consumer, right_consumer, reducer) = consumer.split_at(mid);
+            let (left_result, right_result) = join_context(
+                |context| {
+                    helper(
+                        mid,
+                        context.migrated(),
+                        splitter,
+                        left_producer,
+                        left_consumer,
+                    )
+                },
+                |context| {
+                    helper(
+                        len - mid,
+                        context.migrated(),
+                        splitter,
+                        right_producer,
+                        right_consumer,
+                    )
+                },
+            );
+            reducer.reduce(left_result, right_result)
+        } else {
+            producer.fold_with(consumer.into_folder()).complete()
+        }
+    }
+}
+
+/// A variant of [`bridge_producer_consumer`] where the producer is an unindexed producer.
+///
+/// [`bridge_producer_consumer`]: fn.bridge_producer_consumer.html
+pub fn bridge_unindexed<P, C>(producer: P, consumer: C) -> C::Result
+where
+    P: UnindexedProducer,
+    C: UnindexedConsumer<P::Item>,
+{
+    let splitter = Splitter::new();
+    bridge_unindexed_producer_consumer(false, splitter, producer, consumer)
+}
+
+fn bridge_unindexed_producer_consumer<P, C>(
+    migrated: bool,
+    mut splitter: Splitter,
+    producer: P,
+    consumer: C,
+) -> C::Result
+where
+    P: UnindexedProducer,
+    C: UnindexedConsumer<P::Item>,
+{
+    if consumer.full() {
+        consumer.into_folder().complete()
+    } else if splitter.try_split(migrated) {
+        match producer.split() {
+            (left_producer, Some(right_producer)) => {
+                let (reducer, left_consumer, right_consumer) =
+                    (consumer.to_reducer(), consumer.split_off_left(), consumer);
+                let bridge = bridge_unindexed_producer_consumer;
+                let (left_result, right_result) = join_context(
+                    |context| bridge(context.migrated(), splitter, left_producer, left_consumer),
+                    |context| bridge(context.migrated(), splitter, right_producer, right_consumer),
+                );
+                reducer.reduce(left_result, right_result)
+            }
+            (producer, None) => producer.fold_with(consumer.into_folder()).complete(),
+        }
+    } else {
+        producer.fold_with(consumer.into_folder()).complete()
+    }
+}
diff --git a/src/iter/positions.rs b/src/iter/positions.rs
new file mode 100644
index 0000000..f584bb2
--- /dev/null
+++ b/src/iter/positions.rs
@@ -0,0 +1,137 @@
+use super::plumbing::*;
+use super::*;
+
+use std::fmt::{self, Debug};
+
+/// `Positions` takes a predicate `predicate` and filters out elements that match,
+/// yielding their indices.
+///
+/// This struct is created by the [`positions()`] method on [`IndexedParallelIterator`]
+///
+/// [`positions()`]: trait.IndexedParallelIterator.html#method.positions
+/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Clone)]
+pub struct Positions<I: IndexedParallelIterator, P> {
+    base: I,
+    predicate: P,
+}
+
+impl<I: IndexedParallelIterator + Debug, P> Debug for Positions<I, P> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("Positions")
+            .field("base", &self.base)
+            .finish()
+    }
+}
+
+impl<I, P> Positions<I, P>
+where
+    I: IndexedParallelIterator,
+{
+    /// Create a new `Positions` iterator.
+    pub(super) fn new(base: I, predicate: P) -> Self {
+        Positions { base, predicate }
+    }
+}
+
+impl<I, P> ParallelIterator for Positions<I, P>
+where
+    I: IndexedParallelIterator,
+    P: Fn(I::Item) -> bool + Sync + Send,
+{
+    type Item = usize;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let consumer1 = PositionsConsumer::new(consumer, &self.predicate, 0);
+        self.base.drive(consumer1)
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// Consumer implementation
+
+struct PositionsConsumer<'p, C, P> {
+    base: C,
+    predicate: &'p P,
+    offset: usize,
+}
+
+impl<'p, C, P> PositionsConsumer<'p, C, P> {
+    fn new(base: C, predicate: &'p P, offset: usize) -> Self {
+        PositionsConsumer {
+            base,
+            predicate,
+            offset,
+        }
+    }
+}
+
+impl<'p, T, C, P> Consumer<T> for PositionsConsumer<'p, C, P>
+where
+    C: Consumer<usize>,
+    P: Fn(T) -> bool + Sync,
+{
+    type Folder = PositionsFolder<'p, C::Folder, P>;
+    type Reducer = C::Reducer;
+    type Result = C::Result;
+
+    fn split_at(self, index: usize) -> (Self, Self, C::Reducer) {
+        let (left, right, reducer) = self.base.split_at(index);
+        (
+            PositionsConsumer::new(left, self.predicate, self.offset),
+            PositionsConsumer::new(right, self.predicate, self.offset + index),
+            reducer,
+        )
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        PositionsFolder {
+            base: self.base.into_folder(),
+            predicate: self.predicate,
+            offset: self.offset,
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
+
+struct PositionsFolder<'p, F, P> {
+    base: F,
+    predicate: &'p P,
+    offset: usize,
+}
+
+impl<F, P, T> Folder<T> for PositionsFolder<'_, F, P>
+where
+    F: Folder<usize>,
+    P: Fn(T) -> bool,
+{
+    type Result = F::Result;
+
+    fn consume(mut self, item: T) -> Self {
+        let index = self.offset;
+        self.offset += 1;
+        if (self.predicate)(item) {
+            self.base = self.base.consume(index);
+        }
+        self
+    }
+
+    // This cannot easily specialize `consume_iter` to be better than
+    // the default, because that requires checking `self.base.full()`
+    // during a call to `self.base.consume_iter()`. (#632)
+
+    fn complete(self) -> Self::Result {
+        self.base.complete()
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
diff --git a/src/iter/product.rs b/src/iter/product.rs
new file mode 100644
index 0000000..a3d0727
--- /dev/null
+++ b/src/iter/product.rs
@@ -0,0 +1,114 @@
+use super::plumbing::*;
+use super::ParallelIterator;
+
+use std::iter::{self, Product};
+use std::marker::PhantomData;
+
+pub(super) fn product<PI, P>(pi: PI) -> P
+where
+    PI: ParallelIterator,
+    P: Send + Product<PI::Item> + Product,
+{
+    pi.drive_unindexed(ProductConsumer::new())
+}
+
+fn mul<T: Product>(left: T, right: T) -> T {
+    iter::once(left).chain(iter::once(right)).product()
+}
+
+struct ProductConsumer<P: Send> {
+    _marker: PhantomData<*const P>,
+}
+
+unsafe impl<P: Send> Send for ProductConsumer<P> {}
+
+impl<P: Send> ProductConsumer<P> {
+    fn new() -> ProductConsumer<P> {
+        ProductConsumer {
+            _marker: PhantomData,
+        }
+    }
+}
+
+impl<P, T> Consumer<T> for ProductConsumer<P>
+where
+    P: Send + Product<T> + Product,
+{
+    type Folder = ProductFolder<P>;
+    type Reducer = Self;
+    type Result = P;
+
+    fn split_at(self, _index: usize) -> (Self, Self, Self) {
+        (
+            ProductConsumer::new(),
+            ProductConsumer::new(),
+            ProductConsumer::new(),
+        )
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        ProductFolder {
+            product: iter::empty::<T>().product(),
+        }
+    }
+
+    fn full(&self) -> bool {
+        false
+    }
+}
+
+impl<P, T> UnindexedConsumer<T> for ProductConsumer<P>
+where
+    P: Send + Product<T> + Product,
+{
+    fn split_off_left(&self) -> Self {
+        ProductConsumer::new()
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        ProductConsumer::new()
+    }
+}
+
+impl<P> Reducer<P> for ProductConsumer<P>
+where
+    P: Send + Product,
+{
+    fn reduce(self, left: P, right: P) -> P {
+        mul(left, right)
+    }
+}
+
+struct ProductFolder<P> {
+    product: P,
+}
+
+impl<P, T> Folder<T> for ProductFolder<P>
+where
+    P: Product<T> + Product,
+{
+    type Result = P;
+
+    fn consume(self, item: T) -> Self {
+        ProductFolder {
+            product: mul(self.product, iter::once(item).product()),
+        }
+    }
+
+    fn consume_iter<I>(self, iter: I) -> Self
+    where
+        I: IntoIterator<Item = T>,
+    {
+        ProductFolder {
+            product: mul(self.product, iter.into_iter().product()),
+        }
+    }
+
+    fn complete(self) -> P {
+        self.product
+    }
+
+    fn full(&self) -> bool {
+        false
+    }
+}
diff --git a/src/iter/reduce.rs b/src/iter/reduce.rs
new file mode 100644
index 0000000..321b5dd
--- /dev/null
+++ b/src/iter/reduce.rs
@@ -0,0 +1,116 @@
+use super::plumbing::*;
+use super::ParallelIterator;
+
+pub(super) fn reduce<PI, R, ID, T>(pi: PI, identity: ID, reduce_op: R) -> T
+where
+    PI: ParallelIterator<Item = T>,
+    R: Fn(T, T) -> T + Sync,
+    ID: Fn() -> T + Sync,
+    T: Send,
+{
+    let consumer = ReduceConsumer {
+        identity: &identity,
+        reduce_op: &reduce_op,
+    };
+    pi.drive_unindexed(consumer)
+}
+
+struct ReduceConsumer<'r, R, ID> {
+    identity: &'r ID,
+    reduce_op: &'r R,
+}
+
+impl<'r, R, ID> Copy for ReduceConsumer<'r, R, ID> {}
+
+impl<'r, R, ID> Clone for ReduceConsumer<'r, R, ID> {
+    fn clone(&self) -> Self {
+        *self
+    }
+}
+
+impl<'r, R, ID, T> Consumer<T> for ReduceConsumer<'r, R, ID>
+where
+    R: Fn(T, T) -> T + Sync,
+    ID: Fn() -> T + Sync,
+    T: Send,
+{
+    type Folder = ReduceFolder<'r, R, T>;
+    type Reducer = Self;
+    type Result = T;
+
+    fn split_at(self, _index: usize) -> (Self, Self, Self) {
+        (self, self, self)
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        ReduceFolder {
+            reduce_op: self.reduce_op,
+            item: (self.identity)(),
+        }
+    }
+
+    fn full(&self) -> bool {
+        false
+    }
+}
+
+impl<'r, R, ID, T> UnindexedConsumer<T> for ReduceConsumer<'r, R, ID>
+where
+    R: Fn(T, T) -> T + Sync,
+    ID: Fn() -> T + Sync,
+    T: Send,
+{
+    fn split_off_left(&self) -> Self {
+        *self
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        *self
+    }
+}
+
+impl<'r, R, ID, T> Reducer<T> for ReduceConsumer<'r, R, ID>
+where
+    R: Fn(T, T) -> T + Sync,
+{
+    fn reduce(self, left: T, right: T) -> T {
+        (self.reduce_op)(left, right)
+    }
+}
+
+struct ReduceFolder<'r, R, T> {
+    reduce_op: &'r R,
+    item: T,
+}
+
+impl<'r, R, T> Folder<T> for ReduceFolder<'r, R, T>
+where
+    R: Fn(T, T) -> T,
+{
+    type Result = T;
+
+    fn consume(self, item: T) -> Self {
+        ReduceFolder {
+            reduce_op: self.reduce_op,
+            item: (self.reduce_op)(self.item, item),
+        }
+    }
+
+    fn consume_iter<I>(self, iter: I) -> Self
+    where
+        I: IntoIterator<Item = T>,
+    {
+        ReduceFolder {
+            reduce_op: self.reduce_op,
+            item: iter.into_iter().fold(self.item, self.reduce_op),
+        }
+    }
+
+    fn complete(self) -> T {
+        self.item
+    }
+
+    fn full(&self) -> bool {
+        false
+    }
+}
diff --git a/src/iter/repeat.rs b/src/iter/repeat.rs
new file mode 100644
index 0000000..f84a6fe
--- /dev/null
+++ b/src/iter/repeat.rs
@@ -0,0 +1,241 @@
+use super::plumbing::*;
+use super::*;
+use std::iter;
+use std::usize;
+
+/// Iterator adaptor for [the `repeat()` function](fn.repeat.html).
+#[derive(Debug, Clone)]
+pub struct Repeat<T: Clone + Send> {
+    element: T,
+}
+
+/// Creates a parallel iterator that endlessly repeats `elt` (by
+/// cloning it). Note that this iterator has "infinite" length, so
+/// typically you would want to use `zip` or `take` or some other
+/// means to shorten it, or consider using
+/// [the `repeatn()` function](fn.repeatn.html) instead.
+///
+/// # Examples
+///
+/// ```
+/// use rayon::prelude::*;
+/// use rayon::iter::repeat;
+/// let x: Vec<(i32, i32)> = repeat(22).zip(0..3).collect();
+/// assert_eq!(x, vec![(22, 0), (22, 1), (22, 2)]);
+/// ```
+pub fn repeat<T: Clone + Send>(elt: T) -> Repeat<T> {
+    Repeat { element: elt }
+}
+
+impl<T> Repeat<T>
+where
+    T: Clone + Send,
+{
+    /// Takes only `n` repeats of the element, similar to the general
+    /// [`take()`](trait.IndexedParallelIterator.html#method.take).
+    ///
+    /// The resulting `RepeatN` is an `IndexedParallelIterator`, allowing
+    /// more functionality than `Repeat` alone.
+    pub fn take(self, n: usize) -> RepeatN<T> {
+        repeatn(self.element, n)
+    }
+
+    /// Iterates tuples, repeating the element with items from another
+    /// iterator, similar to the general
+    /// [`zip()`](trait.IndexedParallelIterator.html#method.zip).
+    pub fn zip<Z>(self, zip_op: Z) -> Zip<RepeatN<T>, Z::Iter>
+    where
+        Z: IntoParallelIterator,
+        Z::Iter: IndexedParallelIterator,
+    {
+        let z = zip_op.into_par_iter();
+        let n = z.len();
+        self.take(n).zip(z)
+    }
+}
+
+impl<T> ParallelIterator for Repeat<T>
+where
+    T: Clone + Send,
+{
+    type Item = T;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let producer = RepeatProducer {
+            element: self.element,
+        };
+        bridge_unindexed(producer, consumer)
+    }
+}
+
+/// Unindexed producer for `Repeat`.
+struct RepeatProducer<T: Clone + Send> {
+    element: T,
+}
+
+impl<T: Clone + Send> UnindexedProducer for RepeatProducer<T> {
+    type Item = T;
+
+    fn split(self) -> (Self, Option<Self>) {
+        (
+            RepeatProducer {
+                element: self.element.clone(),
+            },
+            Some(RepeatProducer {
+                element: self.element,
+            }),
+        )
+    }
+
+    fn fold_with<F>(self, folder: F) -> F
+    where
+        F: Folder<T>,
+    {
+        folder.consume_iter(iter::repeat(self.element))
+    }
+}
+
+/// Iterator adaptor for [the `repeatn()` function](fn.repeatn.html).
+#[derive(Debug, Clone)]
+pub struct RepeatN<T: Clone + Send> {
+    element: T,
+    count: usize,
+}
+
+/// Creates a parallel iterator that produces `n` repeats of `elt`
+/// (by cloning it).
+///
+/// # Examples
+///
+/// ```
+/// use rayon::prelude::*;
+/// use rayon::iter::repeatn;
+/// let x: Vec<(i32, i32)> = repeatn(22, 3).zip(0..3).collect();
+/// assert_eq!(x, vec![(22, 0), (22, 1), (22, 2)]);
+/// ```
+pub fn repeatn<T: Clone + Send>(elt: T, n: usize) -> RepeatN<T> {
+    RepeatN {
+        element: elt,
+        count: n,
+    }
+}
+
+impl<T> ParallelIterator for RepeatN<T>
+where
+    T: Clone + Send,
+{
+    type Item = T;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.count)
+    }
+}
+
+impl<T> IndexedParallelIterator for RepeatN<T>
+where
+    T: Clone + Send,
+{
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        callback.callback(RepeatNProducer {
+            element: self.element,
+            count: self.count,
+        })
+    }
+
+    fn len(&self) -> usize {
+        self.count
+    }
+}
+
+/// Producer for `RepeatN`.
+struct RepeatNProducer<T: Clone + Send> {
+    element: T,
+    count: usize,
+}
+
+impl<T: Clone + Send> Producer for RepeatNProducer<T> {
+    type Item = T;
+    type IntoIter = Iter<T>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        Iter {
+            element: self.element,
+            count: self.count,
+        }
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        (
+            RepeatNProducer {
+                element: self.element.clone(),
+                count: index,
+            },
+            RepeatNProducer {
+                element: self.element,
+                count: self.count - index,
+            },
+        )
+    }
+}
+
+/// Iterator for `RepeatN`.
+///
+/// This is conceptually like `std::iter::Take<std::iter::Repeat<T>>`, but
+/// we need `DoubleEndedIterator` and unconditional `ExactSizeIterator`.
+struct Iter<T: Clone> {
+    element: T,
+    count: usize,
+}
+
+impl<T: Clone> Iterator for Iter<T> {
+    type Item = T;
+
+    #[inline]
+    fn next(&mut self) -> Option<T> {
+        if self.count > 0 {
+            self.count -= 1;
+            Some(self.element.clone())
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        (self.count, Some(self.count))
+    }
+}
+
+impl<T: Clone> DoubleEndedIterator for Iter<T> {
+    #[inline]
+    fn next_back(&mut self) -> Option<T> {
+        self.next()
+    }
+}
+
+impl<T: Clone> ExactSizeIterator for Iter<T> {
+    #[inline]
+    fn len(&self) -> usize {
+        self.count
+    }
+}
diff --git a/src/iter/rev.rs b/src/iter/rev.rs
new file mode 100644
index 0000000..a4c3b7c
--- /dev/null
+++ b/src/iter/rev.rs
@@ -0,0 +1,123 @@
+use super::plumbing::*;
+use super::*;
+use std::iter;
+
+/// `Rev` is an iterator that produces elements in reverse order. This struct
+/// is created by the [`rev()`] method on [`IndexedParallelIterator`]
+///
+/// [`rev()`]: trait.IndexedParallelIterator.html#method.rev
+/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Debug, Clone)]
+pub struct Rev<I: IndexedParallelIterator> {
+    base: I,
+}
+
+impl<I> Rev<I>
+where
+    I: IndexedParallelIterator,
+{
+    /// Creates a new `Rev` iterator.
+    pub(super) fn new(base: I) -> Self {
+        Rev { base }
+    }
+}
+
+impl<I> ParallelIterator for Rev<I>
+where
+    I: IndexedParallelIterator,
+{
+    type Item = I::Item;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<I> IndexedParallelIterator for Rev<I>
+where
+    I: IndexedParallelIterator,
+{
+    fn drive<C: Consumer<Self::Item>>(self, consumer: C) -> C::Result {
+        bridge(self, consumer)
+    }
+
+    fn len(&self) -> usize {
+        self.base.len()
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        let len = self.base.len();
+        return self.base.with_producer(Callback { callback, len });
+
+        struct Callback<CB> {
+            callback: CB,
+            len: usize,
+        }
+
+        impl<T, CB> ProducerCallback<T> for Callback<CB>
+        where
+            CB: ProducerCallback<T>,
+        {
+            type Output = CB::Output;
+            fn callback<P>(self, base: P) -> CB::Output
+            where
+                P: Producer<Item = T>,
+            {
+                let producer = RevProducer {
+                    base,
+                    len: self.len,
+                };
+                self.callback.callback(producer)
+            }
+        }
+    }
+}
+
+struct RevProducer<P> {
+    base: P,
+    len: usize,
+}
+
+impl<P> Producer for RevProducer<P>
+where
+    P: Producer,
+{
+    type Item = P::Item;
+    type IntoIter = iter::Rev<P::IntoIter>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.base.into_iter().rev()
+    }
+
+    fn min_len(&self) -> usize {
+        self.base.min_len()
+    }
+    fn max_len(&self) -> usize {
+        self.base.max_len()
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        let (left, right) = self.base.split_at(self.len - index);
+        (
+            RevProducer {
+                base: right,
+                len: index,
+            },
+            RevProducer {
+                base: left,
+                len: self.len - index,
+            },
+        )
+    }
+}
diff --git a/src/iter/skip.rs b/src/iter/skip.rs
new file mode 100644
index 0000000..9983f16
--- /dev/null
+++ b/src/iter/skip.rs
@@ -0,0 +1,88 @@
+use super::noop::NoopConsumer;
+use super::plumbing::*;
+use super::*;
+use std::cmp::min;
+
+/// `Skip` is an iterator that skips over the first `n` elements.
+/// This struct is created by the [`skip()`] method on [`IndexedParallelIterator`]
+///
+/// [`skip()`]: trait.IndexedParallelIterator.html#method.skip
+/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Debug, Clone)]
+pub struct Skip<I> {
+    base: I,
+    n: usize,
+}
+
+impl<I> Skip<I>
+where
+    I: IndexedParallelIterator,
+{
+    /// Creates a new `Skip` iterator.
+    pub(super) fn new(base: I, n: usize) -> Self {
+        let n = min(base.len(), n);
+        Skip { base, n }
+    }
+}
+
+impl<I> ParallelIterator for Skip<I>
+where
+    I: IndexedParallelIterator,
+{
+    type Item = I::Item;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<I> IndexedParallelIterator for Skip<I>
+where
+    I: IndexedParallelIterator,
+{
+    fn len(&self) -> usize {
+        self.base.len() - self.n
+    }
+
+    fn drive<C: Consumer<Self::Item>>(self, consumer: C) -> C::Result {
+        bridge(self, consumer)
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        return self.base.with_producer(Callback {
+            callback,
+            n: self.n,
+        });
+
+        struct Callback<CB> {
+            callback: CB,
+            n: usize,
+        }
+
+        impl<T, CB> ProducerCallback<T> for Callback<CB>
+        where
+            CB: ProducerCallback<T>,
+        {
+            type Output = CB::Output;
+            fn callback<P>(self, base: P) -> CB::Output
+            where
+                P: Producer<Item = T>,
+            {
+                let (before_skip, after_skip) = base.split_at(self.n);
+                bridge_producer_consumer(self.n, before_skip, NoopConsumer);
+                self.callback.callback(after_skip)
+            }
+        }
+    }
+}
diff --git a/src/iter/splitter.rs b/src/iter/splitter.rs
new file mode 100644
index 0000000..40935ac
--- /dev/null
+++ b/src/iter/splitter.rs
@@ -0,0 +1,174 @@
+use super::plumbing::*;
+use super::*;
+
+use std::fmt::{self, Debug};
+
+/// The `split` function takes arbitrary data and a closure that knows how to
+/// split it, and turns this into a `ParallelIterator`.
+///
+/// # Examples
+///
+/// As a simple example, Rayon can recursively split ranges of indices
+///
+/// ```
+/// use rayon::iter;
+/// use rayon::prelude::*;
+/// use std::ops::Range;
+///
+///
+/// // We define a range of indices as follows
+/// type Range1D = Range<usize>;
+///
+/// // Splitting it in two can be done like this
+/// fn split_range1(r: Range1D) -> (Range1D, Option<Range1D>) {
+///     // We are mathematically unable to split the range if there is only
+///     // one point inside of it, but we could stop splitting before that.
+///     if r.end - r.start <= 1 { return (r, None); }
+///
+///     // Here, our range is considered large enough to be splittable
+///     let midpoint = r.start + (r.end - r.start) / 2;
+///     (r.start..midpoint, Some(midpoint..r.end))
+/// }
+///
+/// // By using iter::split, Rayon will split the range until it has enough work
+/// // to feed the CPU cores, then give us the resulting sub-ranges
+/// iter::split(0..4096, split_range1).for_each(|sub_range| {
+///     // As our initial range had a power-of-two size, the final sub-ranges
+///     // should have power-of-two sizes too
+///     assert!((sub_range.end - sub_range.start).is_power_of_two());
+/// });
+/// ```
+///
+/// This recursive splitting can be extended to two or three dimensions,
+/// to reproduce a classic "block-wise" parallelization scheme of graphics and
+/// numerical simulations:
+///
+/// ```
+/// # use rayon::iter;
+/// # use rayon::prelude::*;
+/// # use std::ops::Range;
+/// # type Range1D = Range<usize>;
+/// # fn split_range1(r: Range1D) -> (Range1D, Option<Range1D>) {
+/// #     if r.end - r.start <= 1 { return (r, None); }
+/// #     let midpoint = r.start + (r.end - r.start) / 2;
+/// #     (r.start..midpoint, Some(midpoint..r.end))
+/// # }
+/// #
+/// // A two-dimensional range of indices can be built out of two 1D ones
+/// struct Range2D {
+///     // Range of horizontal indices
+///     pub rx: Range1D,
+///
+///     // Range of vertical indices
+///     pub ry: Range1D,
+/// }
+///
+/// // We want to recursively split them by the largest dimension until we have
+/// // enough sub-ranges to feed our mighty multi-core CPU. This function
+/// // carries out one such split.
+/// fn split_range2(r2: Range2D) -> (Range2D, Option<Range2D>) {
+///     // Decide on which axis (horizontal/vertical) the range should be split
+///     let width = r2.rx.end - r2.rx.start;
+///     let height = r2.ry.end - r2.ry.start;
+///     if width >= height {
+///         // This is a wide range, split it on the horizontal axis
+///         let (split_rx, ry) = (split_range1(r2.rx), r2.ry);
+///         let out1 = Range2D {
+///             rx: split_rx.0,
+///             ry: ry.clone(),
+///         };
+///         let out2 = split_rx.1.map(|rx| Range2D { rx, ry });
+///         (out1, out2)
+///     } else {
+///         // This is a tall range, split it on the vertical axis
+///         let (rx, split_ry) = (r2.rx, split_range1(r2.ry));
+///         let out1 = Range2D {
+///             rx: rx.clone(),
+///             ry: split_ry.0,
+///         };
+///         let out2 = split_ry.1.map(|ry| Range2D { rx, ry, });
+///         (out1, out2)
+///     }
+/// }
+///
+/// // Again, rayon can handle the recursive splitting for us
+/// let range = Range2D { rx: 0..800, ry: 0..600 };
+/// iter::split(range, split_range2).for_each(|sub_range| {
+///     // If the sub-ranges were indeed split by the largest dimension, then
+///     // if no dimension was twice larger than the other initially, this
+///     // property will remain true in the final sub-ranges.
+///     let width = sub_range.rx.end - sub_range.rx.start;
+///     let height = sub_range.ry.end - sub_range.ry.start;
+///     assert!((width / 2 <= height) && (height / 2 <= width));
+/// });
+/// ```
+///
+pub fn split<D, S>(data: D, splitter: S) -> Split<D, S>
+where
+    D: Send,
+    S: Fn(D) -> (D, Option<D>) + Sync,
+{
+    Split { data, splitter }
+}
+
+/// `Split` is a parallel iterator using arbitrary data and a splitting function.
+/// This struct is created by the [`split()`] function.
+///
+/// [`split()`]: fn.split.html
+#[derive(Clone)]
+pub struct Split<D, S> {
+    data: D,
+    splitter: S,
+}
+
+impl<D: Debug, S> Debug for Split<D, S> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("Split").field("data", &self.data).finish()
+    }
+}
+
+impl<D, S> ParallelIterator for Split<D, S>
+where
+    D: Send,
+    S: Fn(D) -> (D, Option<D>) + Sync + Send,
+{
+    type Item = D;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let producer = SplitProducer {
+            data: self.data,
+            splitter: &self.splitter,
+        };
+        bridge_unindexed(producer, consumer)
+    }
+}
+
+struct SplitProducer<'a, D, S> {
+    data: D,
+    splitter: &'a S,
+}
+
+impl<'a, D, S> UnindexedProducer for SplitProducer<'a, D, S>
+where
+    D: Send,
+    S: Fn(D) -> (D, Option<D>) + Sync,
+{
+    type Item = D;
+
+    fn split(mut self) -> (Self, Option<Self>) {
+        let splitter = self.splitter;
+        let (left, right) = splitter(self.data);
+        self.data = left;
+        (self, right.map(|data| SplitProducer { data, splitter }))
+    }
+
+    fn fold_with<F>(self, folder: F) -> F
+    where
+        F: Folder<Self::Item>,
+    {
+        folder.consume(self.data)
+    }
+}
diff --git a/src/iter/step_by.rs b/src/iter/step_by.rs
new file mode 100644
index 0000000..2002f42
--- /dev/null
+++ b/src/iter/step_by.rs
@@ -0,0 +1,144 @@
+#![cfg(step_by)]
+use std::cmp::min;
+
+use super::plumbing::*;
+use super::*;
+use crate::math::div_round_up;
+use std::iter;
+use std::usize;
+
+/// `StepBy` is an iterator that skips `n` elements between each yield, where `n` is the given step.
+/// This struct is created by the [`step_by()`] method on [`IndexedParallelIterator`]
+///
+/// [`step_by()`]: trait.IndexedParallelIterator.html#method.step_by
+/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Debug, Clone)]
+pub struct StepBy<I: IndexedParallelIterator> {
+    base: I,
+    step: usize,
+}
+
+impl<I> StepBy<I>
+where
+    I: IndexedParallelIterator,
+{
+    /// Creates a new `StepBy` iterator.
+    pub(super) fn new(base: I, step: usize) -> Self {
+        StepBy { base, step }
+    }
+}
+
+impl<I> ParallelIterator for StepBy<I>
+where
+    I: IndexedParallelIterator,
+{
+    type Item = I::Item;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<I> IndexedParallelIterator for StepBy<I>
+where
+    I: IndexedParallelIterator,
+{
+    fn drive<C: Consumer<Self::Item>>(self, consumer: C) -> C::Result {
+        bridge(self, consumer)
+    }
+
+    fn len(&self) -> usize {
+        div_round_up(self.base.len(), self.step)
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        let len = self.base.len();
+        return self.base.with_producer(Callback {
+            callback,
+            step: self.step,
+            len,
+        });
+
+        struct Callback<CB> {
+            callback: CB,
+            step: usize,
+            len: usize,
+        }
+
+        impl<T, CB> ProducerCallback<T> for Callback<CB>
+        where
+            CB: ProducerCallback<T>,
+        {
+            type Output = CB::Output;
+            fn callback<P>(self, base: P) -> CB::Output
+            where
+                P: Producer<Item = T>,
+            {
+                let producer = StepByProducer {
+                    base,
+                    step: self.step,
+                    len: self.len,
+                };
+                self.callback.callback(producer)
+            }
+        }
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// Producer implementation
+
+struct StepByProducer<P> {
+    base: P,
+    step: usize,
+    len: usize,
+}
+
+impl<P> Producer for StepByProducer<P>
+where
+    P: Producer,
+{
+    type Item = P::Item;
+    type IntoIter = iter::StepBy<P::IntoIter>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.base.into_iter().step_by(self.step)
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        let elem_index = min(index * self.step, self.len);
+
+        let (left, right) = self.base.split_at(elem_index);
+        (
+            StepByProducer {
+                base: left,
+                step: self.step,
+                len: elem_index,
+            },
+            StepByProducer {
+                base: right,
+                step: self.step,
+                len: self.len - elem_index,
+            },
+        )
+    }
+
+    fn min_len(&self) -> usize {
+        div_round_up(self.base.min_len(), self.step)
+    }
+
+    fn max_len(&self) -> usize {
+        self.base.max_len() / self.step
+    }
+}
diff --git a/src/iter/sum.rs b/src/iter/sum.rs
new file mode 100644
index 0000000..a73e0bf
--- /dev/null
+++ b/src/iter/sum.rs
@@ -0,0 +1,110 @@
+use super::plumbing::*;
+use super::ParallelIterator;
+
+use std::iter::{self, Sum};
+use std::marker::PhantomData;
+
+pub(super) fn sum<PI, S>(pi: PI) -> S
+where
+    PI: ParallelIterator,
+    S: Send + Sum<PI::Item> + Sum,
+{
+    pi.drive_unindexed(SumConsumer::new())
+}
+
+fn add<T: Sum>(left: T, right: T) -> T {
+    iter::once(left).chain(iter::once(right)).sum()
+}
+
+struct SumConsumer<S: Send> {
+    _marker: PhantomData<*const S>,
+}
+
+unsafe impl<S: Send> Send for SumConsumer<S> {}
+
+impl<S: Send> SumConsumer<S> {
+    fn new() -> SumConsumer<S> {
+        SumConsumer {
+            _marker: PhantomData,
+        }
+    }
+}
+
+impl<S, T> Consumer<T> for SumConsumer<S>
+where
+    S: Send + Sum<T> + Sum,
+{
+    type Folder = SumFolder<S>;
+    type Reducer = Self;
+    type Result = S;
+
+    fn split_at(self, _index: usize) -> (Self, Self, Self) {
+        (SumConsumer::new(), SumConsumer::new(), SumConsumer::new())
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        SumFolder {
+            sum: iter::empty::<T>().sum(),
+        }
+    }
+
+    fn full(&self) -> bool {
+        false
+    }
+}
+
+impl<S, T> UnindexedConsumer<T> for SumConsumer<S>
+where
+    S: Send + Sum<T> + Sum,
+{
+    fn split_off_left(&self) -> Self {
+        SumConsumer::new()
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        SumConsumer::new()
+    }
+}
+
+impl<S> Reducer<S> for SumConsumer<S>
+where
+    S: Send + Sum,
+{
+    fn reduce(self, left: S, right: S) -> S {
+        add(left, right)
+    }
+}
+
+struct SumFolder<S> {
+    sum: S,
+}
+
+impl<S, T> Folder<T> for SumFolder<S>
+where
+    S: Sum<T> + Sum,
+{
+    type Result = S;
+
+    fn consume(self, item: T) -> Self {
+        SumFolder {
+            sum: add(self.sum, iter::once(item).sum()),
+        }
+    }
+
+    fn consume_iter<I>(self, iter: I) -> Self
+    where
+        I: IntoIterator<Item = T>,
+    {
+        SumFolder {
+            sum: add(self.sum, iter.into_iter().sum()),
+        }
+    }
+
+    fn complete(self) -> S {
+        self.sum
+    }
+
+    fn full(&self) -> bool {
+        false
+    }
+}
diff --git a/src/iter/take.rs b/src/iter/take.rs
new file mode 100644
index 0000000..52d15d8
--- /dev/null
+++ b/src/iter/take.rs
@@ -0,0 +1,86 @@
+use super::plumbing::*;
+use super::*;
+use std::cmp::min;
+
+/// `Take` is an iterator that iterates over the first `n` elements.
+/// This struct is created by the [`take()`] method on [`IndexedParallelIterator`]
+///
+/// [`take()`]: trait.IndexedParallelIterator.html#method.take
+/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Debug, Clone)]
+pub struct Take<I> {
+    base: I,
+    n: usize,
+}
+
+impl<I> Take<I>
+where
+    I: IndexedParallelIterator,
+{
+    /// Creates a new `Take` iterator.
+    pub(super) fn new(base: I, n: usize) -> Self {
+        let n = min(base.len(), n);
+        Take { base, n }
+    }
+}
+
+impl<I> ParallelIterator for Take<I>
+where
+    I: IndexedParallelIterator,
+{
+    type Item = I::Item;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<I> IndexedParallelIterator for Take<I>
+where
+    I: IndexedParallelIterator,
+{
+    fn len(&self) -> usize {
+        self.n
+    }
+
+    fn drive<C: Consumer<Self::Item>>(self, consumer: C) -> C::Result {
+        bridge(self, consumer)
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        return self.base.with_producer(Callback {
+            callback,
+            n: self.n,
+        });
+
+        struct Callback<CB> {
+            callback: CB,
+            n: usize,
+        }
+
+        impl<T, CB> ProducerCallback<T> for Callback<CB>
+        where
+            CB: ProducerCallback<T>,
+        {
+            type Output = CB::Output;
+            fn callback<P>(self, base: P) -> CB::Output
+            where
+                P: Producer<Item = T>,
+            {
+                let (producer, _) = base.split_at(self.n);
+                self.callback.callback(producer)
+            }
+        }
+    }
+}
diff --git a/src/iter/test.rs b/src/iter/test.rs
new file mode 100644
index 0000000..bc5106b
--- /dev/null
+++ b/src/iter/test.rs
@@ -0,0 +1,2188 @@
+use std::sync::atomic::{AtomicUsize, Ordering};
+
+use super::*;
+use crate::prelude::*;
+use rayon_core::*;
+
+use rand::distributions::Standard;
+use rand::{Rng, SeedableRng};
+use rand_xorshift::XorShiftRng;
+use std::collections::LinkedList;
+use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
+use std::collections::{BinaryHeap, VecDeque};
+use std::f64;
+use std::fmt::Debug;
+use std::sync::mpsc;
+use std::usize;
+
+fn is_indexed<T: IndexedParallelIterator>(_: T) {}
+
+fn seeded_rng() -> XorShiftRng {
+    let mut seed = <XorShiftRng as SeedableRng>::Seed::default();
+    (0..).zip(seed.as_mut()).for_each(|(i, x)| *x = i);
+    XorShiftRng::from_seed(seed)
+}
+
+#[test]
+fn execute() {
+    let a: Vec<i32> = (0..1024).collect();
+    let mut b = vec![];
+    a.par_iter().map(|&i| i + 1).collect_into_vec(&mut b);
+    let c: Vec<i32> = (0..1024).map(|i| i + 1).collect();
+    assert_eq!(b, c);
+}
+
+#[test]
+fn execute_cloned() {
+    let a: Vec<i32> = (0..1024).collect();
+    let mut b: Vec<i32> = vec![];
+    a.par_iter().cloned().collect_into_vec(&mut b);
+    let c: Vec<i32> = (0..1024).collect();
+    assert_eq!(b, c);
+}
+
+#[test]
+fn execute_range() {
+    let a = 0i32..1024;
+    let mut b = vec![];
+    a.into_par_iter().map(|i| i + 1).collect_into_vec(&mut b);
+    let c: Vec<i32> = (0..1024).map(|i| i + 1).collect();
+    assert_eq!(b, c);
+}
+
+#[test]
+fn execute_unindexed_range() {
+    let a = 0i64..1024;
+    let b: LinkedList<i64> = a.into_par_iter().map(|i| i + 1).collect();
+    let c: LinkedList<i64> = (0..1024).map(|i| i + 1).collect();
+    assert_eq!(b, c);
+}
+
+#[test]
+fn execute_pseudo_indexed_range() {
+    use std::i128::MAX;
+    let range = MAX - 1024..MAX;
+
+    // Given `Some` length, collecting `Vec` will try to act indexed.
+    let a = range.clone().into_par_iter();
+    assert_eq!(a.opt_len(), Some(1024));
+
+    let b: Vec<i128> = a.map(|i| i + 1).collect();
+    let c: Vec<i128> = range.map(|i| i + 1).collect();
+    assert_eq!(b, c);
+}
+
+#[test]
+fn check_map_indexed() {
+    let a = [1, 2, 3];
+    is_indexed(a.par_iter().map(|x| x));
+}
+
+#[test]
+fn map_sum() {
+    let a: Vec<i32> = (0..1024).collect();
+    let r1: i32 = a.par_iter().map(|&i| i + 1).sum();
+    let r2 = a.iter().map(|&i| i + 1).sum();
+    assert_eq!(r1, r2);
+}
+
+#[test]
+fn map_reduce() {
+    let a: Vec<i32> = (0..1024).collect();
+    let r1 = a.par_iter().map(|&i| i + 1).reduce(|| 0, |i, j| i + j);
+    let r2 = a.iter().map(|&i| i + 1).sum();
+    assert_eq!(r1, r2);
+}
+
+#[test]
+fn map_reduce_with() {
+    let a: Vec<i32> = (0..1024).collect();
+    let r1 = a.par_iter().map(|&i| i + 1).reduce_with(|i, j| i + j);
+    let r2 = a.iter().map(|&i| i + 1).sum();
+    assert_eq!(r1, Some(r2));
+}
+
+#[test]
+fn fold_map_reduce() {
+    // Kind of a weird test, but it demonstrates various
+    // transformations that are taking place. Relies on
+    // `with_max_len(1).fold()` being equivalent to `map()`.
+    //
+    // Take each number from 0 to 32 and fold them by appending to a
+    // vector.  Because of `with_max_len(1)`, this will produce 32 vectors,
+    // each with one item.  We then collect all of these into an
+    // individual vector by mapping each into their own vector (so we
+    // have Vec<Vec<i32>>) and then reducing those into a single
+    // vector.
+    let r1 = (0_i32..32)
+        .into_par_iter()
+        .with_max_len(1)
+        .fold(
+            || vec![],
+            |mut v, e| {
+                v.push(e);
+                v
+            },
+        )
+        .map(|v| vec![v])
+        .reduce_with(|mut v_a, v_b| {
+            v_a.extend(v_b);
+            v_a
+        });
+    assert_eq!(
+        r1,
+        Some(vec![
+            vec![0],
+            vec![1],
+            vec![2],
+            vec![3],
+            vec![4],
+            vec![5],
+            vec![6],
+            vec![7],
+            vec![8],
+            vec![9],
+            vec![10],
+            vec![11],
+            vec![12],
+            vec![13],
+            vec![14],
+            vec![15],
+            vec![16],
+            vec![17],
+            vec![18],
+            vec![19],
+            vec![20],
+            vec![21],
+            vec![22],
+            vec![23],
+            vec![24],
+            vec![25],
+            vec![26],
+            vec![27],
+            vec![28],
+            vec![29],
+            vec![30],
+            vec![31]
+        ])
+    );
+}
+
+#[test]
+fn fold_is_full() {
+    let counter = AtomicUsize::new(0);
+    let a = (0_i32..2048)
+        .into_par_iter()
+        .inspect(|_| {
+            counter.fetch_add(1, Ordering::SeqCst);
+        })
+        .fold(|| 0, |a, b| a + b)
+        .find_any(|_| true);
+    assert!(a.is_some());
+    assert!(counter.load(Ordering::SeqCst) < 2048); // should not have visited every single one
+}
+
+#[test]
+fn check_step_by() {
+    let a: Vec<i32> = (0..1024).step_by(2).collect();
+    let b: Vec<i32> = (0..1024).into_par_iter().step_by(2).collect();
+
+    assert_eq!(a, b);
+}
+
+#[test]
+fn check_step_by_unaligned() {
+    let a: Vec<i32> = (0..1029).step_by(10).collect();
+    let b: Vec<i32> = (0..1029).into_par_iter().step_by(10).collect();
+
+    assert_eq!(a, b)
+}
+
+#[test]
+fn check_step_by_rev() {
+    let a: Vec<i32> = (0..1024).step_by(2).rev().collect();
+    let b: Vec<i32> = (0..1024).into_par_iter().step_by(2).rev().collect();
+
+    assert_eq!(a, b);
+}
+
+#[test]
+fn check_enumerate() {
+    let a: Vec<usize> = (0..1024).rev().collect();
+
+    let mut b = vec![];
+    a.par_iter()
+        .enumerate()
+        .map(|(i, &x)| i + x)
+        .collect_into_vec(&mut b);
+    assert!(b.iter().all(|&x| x == a.len() - 1));
+}
+
+#[test]
+fn check_enumerate_rev() {
+    let a: Vec<usize> = (0..1024).rev().collect();
+
+    let mut b = vec![];
+    a.par_iter()
+        .enumerate()
+        .rev()
+        .map(|(i, &x)| i + x)
+        .collect_into_vec(&mut b);
+    assert!(b.iter().all(|&x| x == a.len() - 1));
+}
+
+#[test]
+fn check_indices_after_enumerate_split() {
+    let a: Vec<i32> = (0..1024).collect();
+    a.par_iter().enumerate().with_producer(WithProducer);
+
+    struct WithProducer;
+    impl<'a> ProducerCallback<(usize, &'a i32)> for WithProducer {
+        type Output = ();
+        fn callback<P>(self, producer: P)
+        where
+            P: Producer<Item = (usize, &'a i32)>,
+        {
+            let (a, b) = producer.split_at(512);
+            for ((index, value), trusted_index) in a.into_iter().zip(0..) {
+                assert_eq!(index, trusted_index);
+                assert_eq!(index, *value as usize);
+            }
+            for ((index, value), trusted_index) in b.into_iter().zip(512..) {
+                assert_eq!(index, trusted_index);
+                assert_eq!(index, *value as usize);
+            }
+        }
+    }
+}
+
+#[test]
+fn check_increment() {
+    let mut a: Vec<usize> = (0..1024).rev().collect();
+
+    a.par_iter_mut().enumerate().for_each(|(i, v)| *v += i);
+
+    assert!(a.iter().all(|&x| x == a.len() - 1));
+}
+
+#[test]
+fn check_skip() {
+    let a: Vec<usize> = (0..1024).collect();
+
+    let mut v1 = Vec::new();
+    a.par_iter().skip(16).collect_into_vec(&mut v1);
+    let v2 = a.iter().skip(16).collect::<Vec<_>>();
+    assert_eq!(v1, v2);
+
+    let mut v1 = Vec::new();
+    a.par_iter().skip(2048).collect_into_vec(&mut v1);
+    let v2 = a.iter().skip(2048).collect::<Vec<_>>();
+    assert_eq!(v1, v2);
+
+    let mut v1 = Vec::new();
+    a.par_iter().skip(0).collect_into_vec(&mut v1);
+    let v2 = a.iter().skip(0).collect::<Vec<_>>();
+    assert_eq!(v1, v2);
+
+    // Check that the skipped elements side effects are executed
+    use std::sync::atomic::{AtomicUsize, Ordering};
+    let num = AtomicUsize::new(0);
+    a.par_iter()
+        .map(|&n| num.fetch_add(n, Ordering::Relaxed))
+        .skip(512)
+        .count();
+    assert_eq!(num.load(Ordering::Relaxed), a.iter().sum::<usize>());
+}
+
+#[test]
+fn check_take() {
+    let a: Vec<usize> = (0..1024).collect();
+
+    let mut v1 = Vec::new();
+    a.par_iter().take(16).collect_into_vec(&mut v1);
+    let v2 = a.iter().take(16).collect::<Vec<_>>();
+    assert_eq!(v1, v2);
+
+    let mut v1 = Vec::new();
+    a.par_iter().take(2048).collect_into_vec(&mut v1);
+    let v2 = a.iter().take(2048).collect::<Vec<_>>();
+    assert_eq!(v1, v2);
+
+    let mut v1 = Vec::new();
+    a.par_iter().take(0).collect_into_vec(&mut v1);
+    let v2 = a.iter().take(0).collect::<Vec<_>>();
+    assert_eq!(v1, v2);
+}
+
+#[test]
+fn check_inspect() {
+    use std::sync::atomic::{AtomicUsize, Ordering};
+
+    let a = AtomicUsize::new(0);
+    let b: usize = (0_usize..1024)
+        .into_par_iter()
+        .inspect(|&i| {
+            a.fetch_add(i, Ordering::Relaxed);
+        })
+        .sum();
+
+    assert_eq!(a.load(Ordering::Relaxed), b);
+}
+
+#[test]
+fn check_move() {
+    let a = vec![vec![1, 2, 3]];
+    let ptr = a[0].as_ptr();
+
+    let mut b = vec![];
+    a.into_par_iter().collect_into_vec(&mut b);
+
+    // a simple move means the inner vec will be completely unchanged
+    assert_eq!(ptr, b[0].as_ptr());
+}
+
+#[test]
+fn check_drops() {
+    use std::sync::atomic::{AtomicUsize, Ordering};
+
+    let c = AtomicUsize::new(0);
+    let a = vec![DropCounter(&c); 10];
+
+    let mut b = vec![];
+    a.clone().into_par_iter().collect_into_vec(&mut b);
+    assert_eq!(c.load(Ordering::Relaxed), 0);
+
+    b.into_par_iter();
+    assert_eq!(c.load(Ordering::Relaxed), 10);
+
+    a.into_par_iter().with_producer(Partial);
+    assert_eq!(c.load(Ordering::Relaxed), 20);
+
+    #[derive(Clone)]
+    struct DropCounter<'a>(&'a AtomicUsize);
+    impl<'a> Drop for DropCounter<'a> {
+        fn drop(&mut self) {
+            self.0.fetch_add(1, Ordering::Relaxed);
+        }
+    }
+
+    struct Partial;
+    impl<'a> ProducerCallback<DropCounter<'a>> for Partial {
+        type Output = ();
+        fn callback<P>(self, producer: P)
+        where
+            P: Producer<Item = DropCounter<'a>>,
+        {
+            let (a, _) = producer.split_at(5);
+            a.into_iter().next();
+        }
+    }
+}
+
+#[test]
+fn check_slice_indexed() {
+    let a = vec![1, 2, 3];
+    is_indexed(a.par_iter());
+}
+
+#[test]
+fn check_slice_mut_indexed() {
+    let mut a = vec![1, 2, 3];
+    is_indexed(a.par_iter_mut());
+}
+
+#[test]
+fn check_vec_indexed() {
+    let a = vec![1, 2, 3];
+    is_indexed(a.clone().into_par_iter());
+}
+
+#[test]
+fn check_range_indexed() {
+    is_indexed((1..5).into_par_iter());
+}
+
+#[test]
+fn check_cmp_direct() {
+    let a = (0..1024).into_par_iter();
+    let b = (0..1024).into_par_iter();
+
+    let result = a.cmp(b);
+
+    assert!(result == ::std::cmp::Ordering::Equal);
+}
+
+#[test]
+fn check_cmp_to_seq() {
+    assert_eq!(
+        (0..1024).into_par_iter().cmp(0..1024),
+        (0..1024).cmp(0..1024)
+    );
+}
+
+#[test]
+fn check_cmp_rng_to_seq() {
+    let mut rng = seeded_rng();
+    let rng = &mut rng;
+    let a: Vec<i32> = rng.sample_iter(&Standard).take(1024).collect();
+    let b: Vec<i32> = rng.sample_iter(&Standard).take(1024).collect();
+    for i in 0..a.len() {
+        let par_result = a[i..].par_iter().cmp(b[i..].par_iter());
+        let seq_result = a[i..].iter().cmp(b[i..].iter());
+
+        assert_eq!(par_result, seq_result);
+    }
+}
+
+#[test]
+fn check_cmp_lt_direct() {
+    let a = (0..1024).into_par_iter();
+    let b = (1..1024).into_par_iter();
+
+    let result = a.cmp(b);
+
+    assert!(result == ::std::cmp::Ordering::Less);
+}
+
+#[test]
+fn check_cmp_lt_to_seq() {
+    assert_eq!(
+        (0..1024).into_par_iter().cmp(1..1024),
+        (0..1024).cmp(1..1024)
+    )
+}
+
+#[test]
+fn check_cmp_gt_direct() {
+    let a = (1..1024).into_par_iter();
+    let b = (0..1024).into_par_iter();
+
+    let result = a.cmp(b);
+
+    assert!(result == ::std::cmp::Ordering::Greater);
+}
+
+#[test]
+fn check_cmp_gt_to_seq() {
+    assert_eq!(
+        (1..1024).into_par_iter().cmp(0..1024),
+        (1..1024).cmp(0..1024)
+    )
+}
+
+#[test]
+fn check_cmp_short_circuit() {
+    // We only use a single thread in order to make the short-circuit behavior deterministic.
+    let pool = ThreadPoolBuilder::new().num_threads(1).build().unwrap();
+
+    let a = vec![0; 1024];
+    let mut b = a.clone();
+    b[42] = 1;
+
+    pool.install(|| {
+        let expected = ::std::cmp::Ordering::Less;
+        assert_eq!(a.par_iter().cmp(&b), expected);
+
+        for len in 1..10 {
+            let counter = AtomicUsize::new(0);
+            let result = a
+                .par_iter()
+                .with_max_len(len)
+                .inspect(|_| {
+                    counter.fetch_add(1, Ordering::SeqCst);
+                })
+                .cmp(&b);
+            assert_eq!(result, expected);
+            // should not have visited every single one
+            assert!(counter.into_inner() < a.len());
+        }
+    });
+}
+
+#[test]
+fn check_partial_cmp_short_circuit() {
+    // We only use a single thread to make the short-circuit behavior deterministic.
+    let pool = ThreadPoolBuilder::new().num_threads(1).build().unwrap();
+
+    let a = vec![0; 1024];
+    let mut b = a.clone();
+    b[42] = 1;
+
+    pool.install(|| {
+        let expected = Some(::std::cmp::Ordering::Less);
+        assert_eq!(a.par_iter().partial_cmp(&b), expected);
+
+        for len in 1..10 {
+            let counter = AtomicUsize::new(0);
+            let result = a
+                .par_iter()
+                .with_max_len(len)
+                .inspect(|_| {
+                    counter.fetch_add(1, Ordering::SeqCst);
+                })
+                .partial_cmp(&b);
+            assert_eq!(result, expected);
+            // should not have visited every single one
+            assert!(counter.into_inner() < a.len());
+        }
+    });
+}
+
+#[test]
+fn check_partial_cmp_nan_short_circuit() {
+    // We only use a single thread to make the short-circuit behavior deterministic.
+    let pool = ThreadPoolBuilder::new().num_threads(1).build().unwrap();
+
+    let a = vec![0.0; 1024];
+    let mut b = a.clone();
+    b[42] = f64::NAN;
+
+    pool.install(|| {
+        let expected = None;
+        assert_eq!(a.par_iter().partial_cmp(&b), expected);
+
+        for len in 1..10 {
+            let counter = AtomicUsize::new(0);
+            let result = a
+                .par_iter()
+                .with_max_len(len)
+                .inspect(|_| {
+                    counter.fetch_add(1, Ordering::SeqCst);
+                })
+                .partial_cmp(&b);
+            assert_eq!(result, expected);
+            // should not have visited every single one
+            assert!(counter.into_inner() < a.len());
+        }
+    });
+}
+
+#[test]
+fn check_partial_cmp_direct() {
+    let a = (0..1024).into_par_iter();
+    let b = (0..1024).into_par_iter();
+
+    let result = a.partial_cmp(b);
+
+    assert!(result == Some(::std::cmp::Ordering::Equal));
+}
+
+#[test]
+fn check_partial_cmp_to_seq() {
+    let par_result = (0..1024).into_par_iter().partial_cmp(0..1024);
+    let seq_result = (0..1024).partial_cmp(0..1024);
+    assert_eq!(par_result, seq_result);
+}
+
+#[test]
+fn check_partial_cmp_rng_to_seq() {
+    let mut rng = seeded_rng();
+    let rng = &mut rng;
+    let a: Vec<i32> = rng.sample_iter(&Standard).take(1024).collect();
+    let b: Vec<i32> = rng.sample_iter(&Standard).take(1024).collect();
+    for i in 0..a.len() {
+        let par_result = a[i..].par_iter().partial_cmp(b[i..].par_iter());
+        let seq_result = a[i..].iter().partial_cmp(b[i..].iter());
+
+        assert_eq!(par_result, seq_result);
+    }
+}
+
+#[test]
+fn check_partial_cmp_lt_direct() {
+    let a = (0..1024).into_par_iter();
+    let b = (1..1024).into_par_iter();
+
+    let result = a.partial_cmp(b);
+
+    assert!(result == Some(::std::cmp::Ordering::Less));
+}
+
+#[test]
+fn check_partial_cmp_lt_to_seq() {
+    let par_result = (0..1024).into_par_iter().partial_cmp(1..1024);
+    let seq_result = (0..1024).partial_cmp(1..1024);
+    assert_eq!(par_result, seq_result);
+}
+
+#[test]
+fn check_partial_cmp_gt_direct() {
+    let a = (1..1024).into_par_iter();
+    let b = (0..1024).into_par_iter();
+
+    let result = a.partial_cmp(b);
+
+    assert!(result == Some(::std::cmp::Ordering::Greater));
+}
+
+#[test]
+fn check_partial_cmp_gt_to_seq() {
+    let par_result = (1..1024).into_par_iter().partial_cmp(0..1024);
+    let seq_result = (1..1024).partial_cmp(0..1024);
+    assert_eq!(par_result, seq_result);
+}
+
+#[test]
+fn check_partial_cmp_none_direct() {
+    let a = vec![f64::NAN, 0.0];
+    let b = vec![0.0, 1.0];
+
+    let result = a.par_iter().partial_cmp(b.par_iter());
+
+    assert!(result == None);
+}
+
+#[test]
+fn check_partial_cmp_none_to_seq() {
+    let a = vec![f64::NAN, 0.0];
+    let b = vec![0.0, 1.0];
+
+    let par_result = a.par_iter().partial_cmp(b.par_iter());
+    let seq_result = a.iter().partial_cmp(b.iter());
+
+    assert_eq!(par_result, seq_result);
+}
+
+#[test]
+fn check_partial_cmp_late_nan_direct() {
+    let a = vec![0.0, f64::NAN];
+    let b = vec![1.0, 1.0];
+
+    let result = a.par_iter().partial_cmp(b.par_iter());
+
+    assert!(result == Some(::std::cmp::Ordering::Less));
+}
+
+#[test]
+fn check_partial_cmp_late_nane_to_seq() {
+    let a = vec![0.0, f64::NAN];
+    let b = vec![1.0, 1.0];
+
+    let par_result = a.par_iter().partial_cmp(b.par_iter());
+    let seq_result = a.iter().partial_cmp(b.iter());
+
+    assert_eq!(par_result, seq_result);
+}
+
+#[test]
+fn check_cmp_lengths() {
+    // comparisons should consider length if they are otherwise equal
+    let a = vec![0; 1024];
+    let b = vec![0; 1025];
+
+    assert_eq!(a.par_iter().cmp(&b), a.iter().cmp(&b));
+    assert_eq!(a.par_iter().partial_cmp(&b), a.iter().partial_cmp(&b));
+}
+
+#[test]
+fn check_eq_direct() {
+    let a = (0..1024).into_par_iter();
+    let b = (0..1024).into_par_iter();
+
+    let result = a.eq(b);
+
+    assert!(result);
+}
+
+#[test]
+fn check_eq_to_seq() {
+    let par_result = (0..1024).into_par_iter().eq((0..1024).into_par_iter());
+    let seq_result = (0..1024).eq(0..1024);
+
+    assert_eq!(par_result, seq_result);
+}
+
+#[test]
+fn check_ne_direct() {
+    let a = (0..1024).into_par_iter();
+    let b = (1..1024).into_par_iter();
+
+    let result = a.ne(b);
+
+    assert!(result);
+}
+
+#[test]
+fn check_ne_to_seq() {
+    let par_result = (0..1024).into_par_iter().ne((1..1025).into_par_iter());
+    let seq_result = (0..1024).ne(1..1025);
+
+    assert_eq!(par_result, seq_result);
+}
+
+#[test]
+fn check_ne_lengths() {
+    // equality should consider length too
+    let a = vec![0; 1024];
+    let b = vec![0; 1025];
+
+    assert_eq!(a.par_iter().eq(&b), a.iter().eq(&b));
+    assert_eq!(a.par_iter().ne(&b), a.iter().ne(&b));
+}
+
+#[test]
+fn check_lt_direct() {
+    assert!((0..1024).into_par_iter().lt(1..1024));
+    assert!(!(1..1024).into_par_iter().lt(0..1024));
+}
+
+#[test]
+fn check_lt_to_seq() {
+    let par_result = (0..1024).into_par_iter().lt((1..1024).into_par_iter());
+    let seq_result = (0..1024).lt(1..1024);
+
+    assert_eq!(par_result, seq_result);
+}
+
+#[test]
+fn check_le_equal_direct() {
+    assert!((0..1024).into_par_iter().le((0..1024).into_par_iter()));
+}
+
+#[test]
+fn check_le_equal_to_seq() {
+    let par_result = (0..1024).into_par_iter().le((0..1024).into_par_iter());
+    let seq_result = (0..1024).le(0..1024);
+
+    assert_eq!(par_result, seq_result);
+}
+
+#[test]
+fn check_le_less_direct() {
+    assert!((0..1024).into_par_iter().le((1..1024).into_par_iter()));
+}
+
+#[test]
+fn check_le_less_to_seq() {
+    let par_result = (0..1024).into_par_iter().le((1..1024).into_par_iter());
+    let seq_result = (0..1024).le(1..1024);
+
+    assert_eq!(par_result, seq_result);
+}
+
+#[test]
+fn check_gt_direct() {
+    assert!((1..1024).into_par_iter().gt((0..1024).into_par_iter()));
+}
+
+#[test]
+fn check_gt_to_seq() {
+    let par_result = (1..1024).into_par_iter().gt((0..1024).into_par_iter());
+    let seq_result = (1..1024).gt(0..1024);
+
+    assert_eq!(par_result, seq_result);
+}
+
+#[test]
+fn check_ge_equal_direct() {
+    assert!((0..1024).into_par_iter().ge((0..1024).into_par_iter()));
+}
+
+#[test]
+fn check_ge_equal_to_seq() {
+    let par_result = (0..1024).into_par_iter().ge((0..1024).into_par_iter());
+    let seq_result = (0..1024).ge(0..1024);
+
+    assert_eq!(par_result, seq_result);
+}
+
+#[test]
+fn check_ge_greater_direct() {
+    assert!((1..1024).into_par_iter().ge((0..1024).into_par_iter()));
+}
+
+#[test]
+fn check_ge_greater_to_seq() {
+    let par_result = (1..1024).into_par_iter().ge((0..1024).into_par_iter());
+    let seq_result = (1..1024).ge(0..1024);
+
+    assert_eq!(par_result, seq_result);
+}
+
+#[test]
+fn check_zip() {
+    let mut a: Vec<usize> = (0..1024).rev().collect();
+    let b: Vec<usize> = (0..1024).collect();
+
+    a.par_iter_mut().zip(&b[..]).for_each(|(a, &b)| *a += b);
+
+    assert!(a.iter().all(|&x| x == a.len() - 1));
+}
+
+#[test]
+fn check_zip_into_par_iter() {
+    let mut a: Vec<usize> = (0..1024).rev().collect();
+    let b: Vec<usize> = (0..1024).collect();
+
+    a.par_iter_mut()
+        .zip(&b) // here we rely on &b iterating over &usize
+        .for_each(|(a, &b)| *a += b);
+
+    assert!(a.iter().all(|&x| x == a.len() - 1));
+}
+
+#[test]
+fn check_zip_into_mut_par_iter() {
+    let a: Vec<usize> = (0..1024).rev().collect();
+    let mut b: Vec<usize> = (0..1024).collect();
+
+    a.par_iter().zip(&mut b).for_each(|(&a, b)| *b += a);
+
+    assert!(b.iter().all(|&x| x == b.len() - 1));
+}
+
+#[test]
+fn check_zip_range() {
+    let mut a: Vec<usize> = (0..1024).rev().collect();
+
+    a.par_iter_mut()
+        .zip(0usize..1024)
+        .for_each(|(a, b)| *a += b);
+
+    assert!(a.iter().all(|&x| x == a.len() - 1));
+}
+
+#[test]
+fn check_zip_eq() {
+    let mut a: Vec<usize> = (0..1024).rev().collect();
+    let b: Vec<usize> = (0..1024).collect();
+
+    a.par_iter_mut().zip_eq(&b[..]).for_each(|(a, &b)| *a += b);
+
+    assert!(a.iter().all(|&x| x == a.len() - 1));
+}
+
+#[test]
+fn check_zip_eq_into_par_iter() {
+    let mut a: Vec<usize> = (0..1024).rev().collect();
+    let b: Vec<usize> = (0..1024).collect();
+
+    a.par_iter_mut()
+        .zip_eq(&b) // here we rely on &b iterating over &usize
+        .for_each(|(a, &b)| *a += b);
+
+    assert!(a.iter().all(|&x| x == a.len() - 1));
+}
+
+#[test]
+fn check_zip_eq_into_mut_par_iter() {
+    let a: Vec<usize> = (0..1024).rev().collect();
+    let mut b: Vec<usize> = (0..1024).collect();
+
+    a.par_iter().zip_eq(&mut b).for_each(|(&a, b)| *b += a);
+
+    assert!(b.iter().all(|&x| x == b.len() - 1));
+}
+
+#[test]
+fn check_zip_eq_range() {
+    let mut a: Vec<usize> = (0..1024).rev().collect();
+
+    a.par_iter_mut()
+        .zip_eq(0usize..1024)
+        .for_each(|(a, b)| *a += b);
+
+    assert!(a.iter().all(|&x| x == a.len() - 1));
+}
+
+#[test]
+fn check_sum_filtered_ints() {
+    let a: Vec<i32> = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+    let par_sum_evens: i32 = a.par_iter().filter(|&x| (x & 1) == 0).sum();
+    let seq_sum_evens = a.iter().filter(|&x| (x & 1) == 0).sum();
+    assert_eq!(par_sum_evens, seq_sum_evens);
+}
+
+#[test]
+fn check_sum_filtermap_ints() {
+    let a: Vec<i32> = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+    let par_sum_evens: u32 = a
+        .par_iter()
+        .filter_map(|&x| if (x & 1) == 0 { Some(x as u32) } else { None })
+        .sum();
+    let seq_sum_evens = a
+        .iter()
+        .filter_map(|&x| if (x & 1) == 0 { Some(x as u32) } else { None })
+        .sum();
+    assert_eq!(par_sum_evens, seq_sum_evens);
+}
+
+#[test]
+fn check_flat_map_nested_ranges() {
+    // FIXME -- why are precise type hints required on the integers here?
+
+    let v: i32 = (0_i32..10)
+        .into_par_iter()
+        .flat_map(|i| (0_i32..10).into_par_iter().map(move |j| (i, j)))
+        .map(|(i, j)| i * j)
+        .sum();
+
+    let w = (0_i32..10)
+        .flat_map(|i| (0_i32..10).map(move |j| (i, j)))
+        .map(|(i, j)| i * j)
+        .sum();
+
+    assert_eq!(v, w);
+}
+
+#[test]
+fn check_empty_flat_map_sum() {
+    let a: Vec<i32> = (0..1024).collect();
+    let empty = &a[..0];
+
+    // empty on the inside
+    let b: i32 = a.par_iter().flat_map(|_| empty).sum();
+    assert_eq!(b, 0);
+
+    // empty on the outside
+    let c: i32 = empty.par_iter().flat_map(|_| a.par_iter()).sum();
+    assert_eq!(c, 0);
+}
+
+#[test]
+fn check_flatten_vec() {
+    let a: Vec<i32> = (0..1024).collect();
+    let b: Vec<Vec<i32>> = vec![a.clone(), a.clone(), a.clone(), a.clone()];
+    let c: Vec<i32> = b.par_iter().flatten().cloned().collect();
+    let mut d = a.clone();
+    d.extend(&a);
+    d.extend(&a);
+    d.extend(&a);
+
+    assert_eq!(d, c);
+}
+
+#[test]
+fn check_flatten_vec_empty() {
+    let a: Vec<Vec<i32>> = vec![vec![]];
+    let b: Vec<i32> = a.par_iter().flatten().cloned().collect();
+
+    assert_eq!(vec![] as Vec<i32>, b);
+}
+
+#[test]
+fn check_slice_split() {
+    let v: Vec<_> = (0..1000).collect();
+    for m in 1..100 {
+        let a: Vec<_> = v.split(|x| x % m == 0).collect();
+        let b: Vec<_> = v.par_split(|x| x % m == 0).collect();
+        assert_eq!(a, b);
+    }
+
+    // same as std::slice::split() examples
+    let slice = [10, 40, 33, 20];
+    let v: Vec<_> = slice.par_split(|num| num % 3 == 0).collect();
+    assert_eq!(v, &[&slice[..2], &slice[3..]]);
+
+    let slice = [10, 40, 33];
+    let v: Vec<_> = slice.par_split(|num| num % 3 == 0).collect();
+    assert_eq!(v, &[&slice[..2], &slice[..0]]);
+
+    let slice = [10, 6, 33, 20];
+    let v: Vec<_> = slice.par_split(|num| num % 3 == 0).collect();
+    assert_eq!(v, &[&slice[..1], &slice[..0], &slice[3..]]);
+}
+
+#[test]
+fn check_slice_split_mut() {
+    let mut v1: Vec<_> = (0..1000).collect();
+    let mut v2 = v1.clone();
+    for m in 1..100 {
+        let a: Vec<_> = v1.split_mut(|x| x % m == 0).collect();
+        let b: Vec<_> = v2.par_split_mut(|x| x % m == 0).collect();
+        assert_eq!(a, b);
+    }
+
+    // same as std::slice::split_mut() example
+    let mut v = [10, 40, 30, 20, 60, 50];
+    v.par_split_mut(|num| num % 3 == 0).for_each(|group| {
+        group[0] = 1;
+    });
+    assert_eq!(v, [1, 40, 30, 1, 60, 1]);
+}
+
+#[test]
+fn check_chunks() {
+    let a: Vec<i32> = vec![1, 5, 10, 4, 100, 3, 1000, 2, 10000, 1];
+    let par_sum_product_pairs: i32 = a.par_chunks(2).map(|c| c.iter().product::<i32>()).sum();
+    let seq_sum_product_pairs = a.chunks(2).map(|c| c.iter().product::<i32>()).sum();
+    assert_eq!(par_sum_product_pairs, 12345);
+    assert_eq!(par_sum_product_pairs, seq_sum_product_pairs);
+
+    let par_sum_product_triples: i32 = a.par_chunks(3).map(|c| c.iter().product::<i32>()).sum();
+    let seq_sum_product_triples = a.chunks(3).map(|c| c.iter().product::<i32>()).sum();
+    assert_eq!(par_sum_product_triples, 5_0 + 12_00 + 20_000_000 + 1);
+    assert_eq!(par_sum_product_triples, seq_sum_product_triples);
+}
+
+#[test]
+fn check_chunks_mut() {
+    let mut a: Vec<i32> = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+    let mut b: Vec<i32> = a.clone();
+    a.par_chunks_mut(2).for_each(|c| c[0] = c.iter().sum());
+    b.chunks_mut(2).for_each(|c| c[0] = c.iter().sum());
+    assert_eq!(a, &[3, 2, 7, 4, 11, 6, 15, 8, 19, 10]);
+    assert_eq!(a, b);
+
+    let mut a: Vec<i32> = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+    let mut b: Vec<i32> = a.clone();
+    a.par_chunks_mut(3).for_each(|c| c[0] = c.iter().sum());
+    b.chunks_mut(3).for_each(|c| c[0] = c.iter().sum());
+    assert_eq!(a, &[6, 2, 3, 15, 5, 6, 24, 8, 9, 10]);
+    assert_eq!(a, b);
+}
+
+#[test]
+fn check_windows() {
+    let a: Vec<i32> = (0..1024).collect();
+    let par: Vec<_> = a.par_windows(2).collect();
+    let seq: Vec<_> = a.windows(2).collect();
+    assert_eq!(par, seq);
+
+    let par: Vec<_> = a.par_windows(100).collect();
+    let seq: Vec<_> = a.windows(100).collect();
+    assert_eq!(par, seq);
+
+    let par: Vec<_> = a.par_windows(1_000_000).collect();
+    let seq: Vec<_> = a.windows(1_000_000).collect();
+    assert_eq!(par, seq);
+
+    let par: Vec<_> = a
+        .par_windows(2)
+        .chain(a.par_windows(1_000_000))
+        .zip(a.par_windows(2))
+        .collect();
+    let seq: Vec<_> = a
+        .windows(2)
+        .chain(a.windows(1_000_000))
+        .zip(a.windows(2))
+        .collect();
+    assert_eq!(par, seq);
+}
+
+#[test]
+fn check_options() {
+    let mut a = vec![None, Some(1), None, None, Some(2), Some(4)];
+
+    assert_eq!(7, a.par_iter().flat_map(|opt| opt).sum::<i32>());
+    assert_eq!(7, a.par_iter().flat_map(|opt| opt).sum::<i32>());
+
+    a.par_iter_mut()
+        .flat_map(|opt| opt)
+        .for_each(|x| *x = *x * *x);
+
+    assert_eq!(21, a.into_par_iter().flat_map(|opt| opt).sum::<i32>());
+}
+
+#[test]
+fn check_results() {
+    let mut a = vec![Err(()), Ok(1i32), Err(()), Err(()), Ok(2), Ok(4)];
+
+    assert_eq!(7, a.par_iter().flat_map(|res| res).sum::<i32>());
+
+    assert_eq!(Err::<i32, ()>(()), a.par_iter().cloned().sum());
+    assert_eq!(Ok(7), a.par_iter().cloned().filter(Result::is_ok).sum());
+
+    assert_eq!(Err::<i32, ()>(()), a.par_iter().cloned().product());
+    assert_eq!(Ok(8), a.par_iter().cloned().filter(Result::is_ok).product());
+
+    a.par_iter_mut()
+        .flat_map(|res| res)
+        .for_each(|x| *x = *x * *x);
+
+    assert_eq!(21, a.into_par_iter().flat_map(|res| res).sum::<i32>());
+}
+
+#[test]
+fn check_binary_heap() {
+    use std::collections::BinaryHeap;
+
+    let a: BinaryHeap<i32> = (0..10).collect();
+
+    assert_eq!(45, a.par_iter().sum::<i32>());
+    assert_eq!(45, a.into_par_iter().sum::<i32>());
+}
+
+#[test]
+fn check_btree_map() {
+    use std::collections::BTreeMap;
+
+    let mut a: BTreeMap<i32, i32> = (0..10).map(|i| (i, -i)).collect();
+
+    assert_eq!(45, a.par_iter().map(|(&k, _)| k).sum::<i32>());
+    assert_eq!(-45, a.par_iter().map(|(_, &v)| v).sum::<i32>());
+
+    a.par_iter_mut().for_each(|(k, v)| *v += *k);
+
+    assert_eq!(0, a.into_par_iter().map(|(_, v)| v).sum::<i32>());
+}
+
+#[test]
+fn check_btree_set() {
+    use std::collections::BTreeSet;
+
+    let a: BTreeSet<i32> = (0..10).collect();
+
+    assert_eq!(45, a.par_iter().sum::<i32>());
+    assert_eq!(45, a.into_par_iter().sum::<i32>());
+}
+
+#[test]
+fn check_hash_map() {
+    use std::collections::HashMap;
+
+    let mut a: HashMap<i32, i32> = (0..10).map(|i| (i, -i)).collect();
+
+    assert_eq!(45, a.par_iter().map(|(&k, _)| k).sum::<i32>());
+    assert_eq!(-45, a.par_iter().map(|(_, &v)| v).sum::<i32>());
+
+    a.par_iter_mut().for_each(|(k, v)| *v += *k);
+
+    assert_eq!(0, a.into_par_iter().map(|(_, v)| v).sum::<i32>());
+}
+
+#[test]
+fn check_hash_set() {
+    use std::collections::HashSet;
+
+    let a: HashSet<i32> = (0..10).collect();
+
+    assert_eq!(45, a.par_iter().sum::<i32>());
+    assert_eq!(45, a.into_par_iter().sum::<i32>());
+}
+
+#[test]
+fn check_linked_list() {
+    use std::collections::LinkedList;
+
+    let mut a: LinkedList<i32> = (0..10).collect();
+
+    assert_eq!(45, a.par_iter().sum::<i32>());
+
+    a.par_iter_mut().for_each(|x| *x = -*x);
+
+    assert_eq!(-45, a.into_par_iter().sum::<i32>());
+}
+
+#[test]
+fn check_vec_deque() {
+    use std::collections::VecDeque;
+
+    let mut a: VecDeque<i32> = (0..10).collect();
+
+    // try to get it to wrap around
+    a.drain(..5);
+    a.extend(0..5);
+
+    assert_eq!(45, a.par_iter().sum::<i32>());
+
+    a.par_iter_mut().for_each(|x| *x = -*x);
+
+    assert_eq!(-45, a.into_par_iter().sum::<i32>());
+}
+
+#[test]
+fn check_chain() {
+    let mut res = vec![];
+
+    // stays indexed in the face of madness
+    Some(0)
+        .into_par_iter()
+        .chain(Ok::<_, ()>(1))
+        .chain(1..4)
+        .chain(Err("huh?"))
+        .chain(None)
+        .chain(vec![5, 8, 13])
+        .map(|x| (x as u8 + b'a') as char)
+        .chain(vec!['x', 'y', 'z'])
+        .zip((0i32..1000).into_par_iter().map(|x| -x))
+        .enumerate()
+        .map(|(a, (b, c))| (a, b, c))
+        .chain(None)
+        .collect_into_vec(&mut res);
+
+    assert_eq!(
+        res,
+        vec![
+            (0, 'a', 0),
+            (1, 'b', -1),
+            (2, 'b', -2),
+            (3, 'c', -3),
+            (4, 'd', -4),
+            (5, 'f', -5),
+            (6, 'i', -6),
+            (7, 'n', -7),
+            (8, 'x', -8),
+            (9, 'y', -9),
+            (10, 'z', -10)
+        ]
+    );
+
+    // unindexed is ok too
+    let res: Vec<i32> = Some(1i32)
+        .into_par_iter()
+        .chain(
+            (2i32..4)
+                .into_par_iter()
+                .chain(vec![5, 6, 7, 8, 9])
+                .chain(Some((10, 100)).into_par_iter().flat_map(|(a, b)| a..b))
+                .filter(|x| x & 1 == 1),
+        )
+        .collect();
+    let other: Vec<i32> = (0..100).filter(|x| x & 1 == 1).collect();
+    assert_eq!(res, other);
+
+    // chain collect is ok with the "fake" specialization
+    let res: Vec<i32> = Some(1i32).into_par_iter().chain(None).collect();
+    assert_eq!(res, &[1]);
+}
+
+#[test]
+fn check_count() {
+    let c0 = (0_u32..24 * 1024).filter(|i| i % 2 == 0).count();
+    let c1 = (0_u32..24 * 1024)
+        .into_par_iter()
+        .filter(|i| i % 2 == 0)
+        .count();
+    assert_eq!(c0, c1);
+}
+
+#[test]
+fn find_any() {
+    let a: Vec<i32> = (0..1024).collect();
+
+    assert!(a.par_iter().find_any(|&&x| x % 42 == 41).is_some());
+    assert_eq!(
+        a.par_iter().find_any(|&&x| x % 19 == 1 && x % 53 == 0),
+        Some(&742_i32)
+    );
+    assert_eq!(a.par_iter().find_any(|&&x| x < 0), None);
+
+    assert!(a.par_iter().position_any(|&x| x % 42 == 41).is_some());
+    assert_eq!(
+        a.par_iter().position_any(|&x| x % 19 == 1 && x % 53 == 0),
+        Some(742_usize)
+    );
+    assert_eq!(a.par_iter().position_any(|&x| x < 0), None);
+
+    assert!(a.par_iter().any(|&x| x > 1000));
+    assert!(!a.par_iter().any(|&x| x < 0));
+
+    assert!(!a.par_iter().all(|&x| x > 1000));
+    assert!(a.par_iter().all(|&x| x >= 0));
+}
+
+#[test]
+fn find_first_or_last() {
+    let a: Vec<i32> = (0..1024).collect();
+
+    assert_eq!(a.par_iter().find_first(|&&x| x % 42 == 41), Some(&41_i32));
+    assert_eq!(
+        a.par_iter().find_first(|&&x| x % 19 == 1 && x % 53 == 0),
+        Some(&742_i32)
+    );
+    assert_eq!(a.par_iter().find_first(|&&x| x < 0), None);
+
+    assert_eq!(
+        a.par_iter().position_first(|&x| x % 42 == 41),
+        Some(41_usize)
+    );
+    assert_eq!(
+        a.par_iter().position_first(|&x| x % 19 == 1 && x % 53 == 0),
+        Some(742_usize)
+    );
+    assert_eq!(a.par_iter().position_first(|&x| x < 0), None);
+
+    assert_eq!(a.par_iter().find_last(|&&x| x % 42 == 41), Some(&1007_i32));
+    assert_eq!(
+        a.par_iter().find_last(|&&x| x % 19 == 1 && x % 53 == 0),
+        Some(&742_i32)
+    );
+    assert_eq!(a.par_iter().find_last(|&&x| x < 0), None);
+
+    assert_eq!(
+        a.par_iter().position_last(|&x| x % 42 == 41),
+        Some(1007_usize)
+    );
+    assert_eq!(
+        a.par_iter().position_last(|&x| x % 19 == 1 && x % 53 == 0),
+        Some(742_usize)
+    );
+    assert_eq!(a.par_iter().position_last(|&x| x < 0), None);
+}
+
+#[test]
+fn find_map_first_or_last_or_any() {
+    let mut a: Vec<i32> = vec![];
+
+    assert!(a.par_iter().find_map_any(half_if_positive).is_none());
+    assert!(a.par_iter().find_map_first(half_if_positive).is_none());
+    assert!(a.par_iter().find_map_last(half_if_positive).is_none());
+
+    a = (-1024..-3).collect();
+
+    assert!(a.par_iter().find_map_any(half_if_positive).is_none());
+    assert!(a.par_iter().find_map_first(half_if_positive).is_none());
+    assert!(a.par_iter().find_map_last(half_if_positive).is_none());
+
+    assert!(a.par_iter().find_map_any(half_if_negative).is_some());
+    assert_eq!(
+        a.par_iter().find_map_first(half_if_negative),
+        Some(-512_i32)
+    );
+    assert_eq!(a.par_iter().find_map_last(half_if_negative), Some(-2_i32));
+
+    a.append(&mut (2..1025).collect());
+
+    assert!(a.par_iter().find_map_any(half_if_positive).is_some());
+    assert_eq!(a.par_iter().find_map_first(half_if_positive), Some(1_i32));
+    assert_eq!(a.par_iter().find_map_last(half_if_positive), Some(512_i32));
+
+    fn half_if_positive(x: &i32) -> Option<i32> {
+        if *x > 0 {
+            Some(x / 2)
+        } else {
+            None
+        }
+    }
+
+    fn half_if_negative(x: &i32) -> Option<i32> {
+        if *x < 0 {
+            Some(x / 2)
+        } else {
+            None
+        }
+    }
+}
+
+#[test]
+fn check_find_not_present() {
+    let counter = AtomicUsize::new(0);
+    let value: Option<i32> = (0_i32..2048).into_par_iter().find_any(|&p| {
+        counter.fetch_add(1, Ordering::SeqCst);
+        p >= 2048
+    });
+    assert!(value.is_none());
+    assert!(counter.load(Ordering::SeqCst) == 2048); // should have visited every single one
+}
+
+#[test]
+fn check_find_is_present() {
+    let counter = AtomicUsize::new(0);
+    let value: Option<i32> = (0_i32..2048).into_par_iter().find_any(|&p| {
+        counter.fetch_add(1, Ordering::SeqCst);
+        p >= 1024 && p < 1096
+    });
+    let q = value.unwrap();
+    assert!(q >= 1024 && q < 1096);
+    assert!(counter.load(Ordering::SeqCst) < 2048); // should not have visited every single one
+}
+
+#[test]
+fn check_while_some() {
+    let value = (0_i32..2048).into_par_iter().map(Some).while_some().max();
+    assert_eq!(value, Some(2047));
+
+    let counter = AtomicUsize::new(0);
+    let value = (0_i32..2048)
+        .into_par_iter()
+        .map(|x| {
+            counter.fetch_add(1, Ordering::SeqCst);
+            if x < 1024 {
+                Some(x)
+            } else {
+                None
+            }
+        })
+        .while_some()
+        .max();
+    assert!(value < Some(1024));
+    assert!(counter.load(Ordering::SeqCst) < 2048); // should not have visited every single one
+}
+
+#[test]
+fn par_iter_collect_option() {
+    let a: Option<Vec<_>> = (0_i32..2048).map(Some).collect();
+    let b: Option<Vec<_>> = (0_i32..2048).into_par_iter().map(Some).collect();
+    assert_eq!(a, b);
+
+    let c: Option<Vec<_>> = (0_i32..2048)
+        .into_par_iter()
+        .map(|x| if x == 1234 { None } else { Some(x) })
+        .collect();
+    assert_eq!(c, None);
+}
+
+#[test]
+fn par_iter_collect_result() {
+    let a: Result<Vec<_>, ()> = (0_i32..2048).map(Ok).collect();
+    let b: Result<Vec<_>, ()> = (0_i32..2048).into_par_iter().map(Ok).collect();
+    assert_eq!(a, b);
+
+    let c: Result<Vec<_>, _> = (0_i32..2048)
+        .into_par_iter()
+        .map(|x| if x == 1234 { Err(x) } else { Ok(x) })
+        .collect();
+    assert_eq!(c, Err(1234));
+
+    let d: Result<Vec<_>, _> = (0_i32..2048)
+        .into_par_iter()
+        .map(|x| if x % 100 == 99 { Err(x) } else { Ok(x) })
+        .collect();
+    assert_eq!(d.map_err(|x| x % 100), Err(99));
+}
+
+#[test]
+fn par_iter_collect() {
+    let a: Vec<i32> = (0..1024).collect();
+    let b: Vec<i32> = a.par_iter().map(|&i| i + 1).collect();
+    let c: Vec<i32> = (0..1024).map(|i| i + 1).collect();
+    assert_eq!(b, c);
+}
+
+#[test]
+fn par_iter_collect_vecdeque() {
+    let a: Vec<i32> = (0..1024).collect();
+    let b: VecDeque<i32> = a.par_iter().cloned().collect();
+    let c: VecDeque<i32> = a.iter().cloned().collect();
+    assert_eq!(b, c);
+}
+
+#[test]
+fn par_iter_collect_binaryheap() {
+    let a: Vec<i32> = (0..1024).collect();
+    let mut b: BinaryHeap<i32> = a.par_iter().cloned().collect();
+    assert_eq!(b.peek(), Some(&1023));
+    assert_eq!(b.len(), 1024);
+    for n in (0..1024).rev() {
+        assert_eq!(b.pop(), Some(n));
+        assert_eq!(b.len() as i32, n);
+    }
+}
+
+#[test]
+fn par_iter_collect_hashmap() {
+    let a: Vec<i32> = (0..1024).collect();
+    let b: HashMap<i32, String> = a.par_iter().map(|&i| (i, format!("{}", i))).collect();
+    assert_eq!(&b[&3], "3");
+    assert_eq!(b.len(), 1024);
+}
+
+#[test]
+fn par_iter_collect_hashset() {
+    let a: Vec<i32> = (0..1024).collect();
+    let b: HashSet<i32> = a.par_iter().cloned().collect();
+    assert_eq!(b.len(), 1024);
+}
+
+#[test]
+fn par_iter_collect_btreemap() {
+    let a: Vec<i32> = (0..1024).collect();
+    let b: BTreeMap<i32, String> = a.par_iter().map(|&i| (i, format!("{}", i))).collect();
+    assert_eq!(&b[&3], "3");
+    assert_eq!(b.len(), 1024);
+}
+
+#[test]
+fn par_iter_collect_btreeset() {
+    let a: Vec<i32> = (0..1024).collect();
+    let b: BTreeSet<i32> = a.par_iter().cloned().collect();
+    assert_eq!(b.len(), 1024);
+}
+
+#[test]
+fn par_iter_collect_linked_list() {
+    let a: Vec<i32> = (0..1024).collect();
+    let b: LinkedList<_> = a.par_iter().map(|&i| (i, format!("{}", i))).collect();
+    let c: LinkedList<_> = a.iter().map(|&i| (i, format!("{}", i))).collect();
+    assert_eq!(b, c);
+}
+
+#[test]
+fn par_iter_collect_linked_list_flat_map_filter() {
+    let b: LinkedList<i32> = (0_i32..1024)
+        .into_par_iter()
+        .flat_map(|i| (0..i))
+        .filter(|&i| i % 2 == 0)
+        .collect();
+    let c: LinkedList<i32> = (0_i32..1024)
+        .flat_map(|i| (0..i))
+        .filter(|&i| i % 2 == 0)
+        .collect();
+    assert_eq!(b, c);
+}
+
+#[test]
+fn par_iter_collect_cows() {
+    use std::borrow::Cow;
+
+    let s = "Fearless Concurrency with Rust";
+
+    // Collects `i32` into a `Vec`
+    let a: Cow<'_, [i32]> = (0..1024).collect();
+    let b: Cow<'_, [i32]> = a.par_iter().cloned().collect();
+    assert_eq!(a, b);
+
+    // Collects `char` into a `String`
+    let a: Cow<'_, str> = s.chars().collect();
+    let b: Cow<'_, str> = s.par_chars().collect();
+    assert_eq!(a, b);
+
+    // Collects `str` into a `String`
+    let a: Cow<'_, str> = s.split_whitespace().collect();
+    let b: Cow<'_, str> = s.par_split_whitespace().collect();
+    assert_eq!(a, b);
+
+    // Collects `String` into a `String`
+    let a: Cow<'_, str> = s.split_whitespace().map(str::to_owned).collect();
+    let b: Cow<'_, str> = s.par_split_whitespace().map(str::to_owned).collect();
+    assert_eq!(a, b);
+}
+
+#[test]
+fn par_iter_unindexed_flat_map() {
+    let b: Vec<i64> = (0_i64..1024).into_par_iter().flat_map(Some).collect();
+    let c: Vec<i64> = (0_i64..1024).flat_map(Some).collect();
+    assert_eq!(b, c);
+}
+
+#[test]
+fn min_max() {
+    let rng = seeded_rng();
+    let a: Vec<i32> = rng.sample_iter(&Standard).take(1024).collect();
+    for i in 0..=a.len() {
+        let slice = &a[..i];
+        assert_eq!(slice.par_iter().min(), slice.iter().min());
+        assert_eq!(slice.par_iter().max(), slice.iter().max());
+    }
+}
+
+#[test]
+fn min_max_by() {
+    let rng = seeded_rng();
+    // Make sure there are duplicate keys, for testing sort stability
+    let r: Vec<i32> = rng.sample_iter(&Standard).take(512).collect();
+    let a: Vec<(i32, u16)> = r.iter().chain(&r).cloned().zip(0..).collect();
+    for i in 0..=a.len() {
+        let slice = &a[..i];
+        assert_eq!(
+            slice.par_iter().min_by(|x, y| x.0.cmp(&y.0)),
+            slice.iter().min_by(|x, y| x.0.cmp(&y.0))
+        );
+        assert_eq!(
+            slice.par_iter().max_by(|x, y| x.0.cmp(&y.0)),
+            slice.iter().max_by(|x, y| x.0.cmp(&y.0))
+        );
+    }
+}
+
+#[test]
+fn min_max_by_key() {
+    let rng = seeded_rng();
+    // Make sure there are duplicate keys, for testing sort stability
+    let r: Vec<i32> = rng.sample_iter(&Standard).take(512).collect();
+    let a: Vec<(i32, u16)> = r.iter().chain(&r).cloned().zip(0..).collect();
+    for i in 0..=a.len() {
+        let slice = &a[..i];
+        assert_eq!(
+            slice.par_iter().min_by_key(|x| x.0),
+            slice.iter().min_by_key(|x| x.0)
+        );
+        assert_eq!(
+            slice.par_iter().max_by_key(|x| x.0),
+            slice.iter().max_by_key(|x| x.0)
+        );
+    }
+}
+
+#[test]
+fn check_rev() {
+    let a: Vec<usize> = (0..1024).rev().collect();
+    let b: Vec<usize> = (0..1024).collect();
+
+    assert!(a.par_iter().rev().zip(b).all(|(&a, b)| a == b));
+}
+
+#[test]
+fn scope_mix() {
+    let counter_p = &AtomicUsize::new(0);
+    scope(|s| {
+        s.spawn(move |s| {
+            divide_and_conquer(s, counter_p, 1024);
+        });
+        s.spawn(move |_| {
+            let a: Vec<i32> = (0..1024).collect();
+            let r1 = a.par_iter().map(|&i| i + 1).reduce_with(|i, j| i + j);
+            let r2 = a.iter().map(|&i| i + 1).sum();
+            assert_eq!(r1.unwrap(), r2);
+        });
+    });
+}
+
+fn divide_and_conquer<'scope>(scope: &Scope<'scope>, counter: &'scope AtomicUsize, size: usize) {
+    if size > 1 {
+        scope.spawn(move |scope| divide_and_conquer(scope, counter, size / 2));
+        scope.spawn(move |scope| divide_and_conquer(scope, counter, size / 2));
+    } else {
+        // count the leaves
+        counter.fetch_add(1, Ordering::SeqCst);
+    }
+}
+
+#[test]
+fn check_split() {
+    use std::ops::Range;
+
+    let a = (0..1024).into_par_iter();
+
+    let b = split(0..1024, |Range { start, end }| {
+        let mid = (end - start) / 2;
+        if mid > start {
+            (start..mid, Some(mid..end))
+        } else {
+            (start..end, None)
+        }
+    })
+    .flat_map(|range| range);
+
+    assert_eq!(a.collect::<Vec<_>>(), b.collect::<Vec<_>>());
+}
+
+#[test]
+fn check_lengths() {
+    fn check(min: usize, max: usize) {
+        let range = 0..1024 * 1024;
+
+        // Check against normalized values.
+        let min_check = cmp::min(cmp::max(min, 1), range.len());
+        let max_check = cmp::max(max, min_check.saturating_add(min_check - 1));
+
+        assert!(
+            range
+                .into_par_iter()
+                .with_min_len(min)
+                .with_max_len(max)
+                .fold(|| 0, |count, _| count + 1)
+                .all(|c| c >= min_check && c <= max_check),
+            "check_lengths failed {:?} -> {:?} ",
+            (min, max),
+            (min_check, max_check)
+        );
+    }
+
+    let lengths = [0, 1, 10, 100, 1_000, 10_000, 100_000, 1_000_000, usize::MAX];
+    for &min in &lengths {
+        for &max in &lengths {
+            check(min, max);
+        }
+    }
+}
+
+#[test]
+fn check_map_with() {
+    let (sender, receiver) = mpsc::channel();
+    let a: HashSet<_> = (0..1024).collect();
+
+    a.par_iter()
+        .cloned()
+        .map_with(sender, |s, i| s.send(i).unwrap())
+        .count();
+
+    let b: HashSet<_> = receiver.iter().collect();
+    assert_eq!(a, b);
+}
+
+#[test]
+fn check_fold_with() {
+    let (sender, receiver) = mpsc::channel();
+    let a: HashSet<_> = (0..1024).collect();
+
+    a.par_iter()
+        .cloned()
+        .fold_with(sender, |s, i| {
+            s.send(i).unwrap();
+            s
+        })
+        .count();
+
+    let b: HashSet<_> = receiver.iter().collect();
+    assert_eq!(a, b);
+}
+
+#[test]
+fn check_for_each_with() {
+    let (sender, receiver) = mpsc::channel();
+    let a: HashSet<_> = (0..1024).collect();
+
+    a.par_iter()
+        .cloned()
+        .for_each_with(sender, |s, i| s.send(i).unwrap());
+
+    let b: HashSet<_> = receiver.iter().collect();
+    assert_eq!(a, b);
+}
+
+#[test]
+fn check_extend_items() {
+    fn check<C>()
+    where
+        C: Default
+            + Eq
+            + Debug
+            + Extend<i32>
+            + for<'a> Extend<&'a i32>
+            + ParallelExtend<i32>
+            + for<'a> ParallelExtend<&'a i32>,
+    {
+        let mut serial = C::default();
+        let mut parallel = C::default();
+
+        // extend with references
+        let v: Vec<_> = (0..128).collect();
+        serial.extend(&v);
+        parallel.par_extend(&v);
+        assert_eq!(serial, parallel);
+
+        // extend with values
+        serial.extend(-128..0);
+        parallel.par_extend(-128..0);
+        assert_eq!(serial, parallel);
+    }
+
+    check::<BTreeSet<_>>();
+    check::<HashSet<_>>();
+    check::<LinkedList<_>>();
+    check::<Vec<_>>();
+    check::<VecDeque<_>>();
+}
+
+#[test]
+fn check_extend_heap() {
+    let mut serial: BinaryHeap<_> = Default::default();
+    let mut parallel: BinaryHeap<_> = Default::default();
+
+    // extend with references
+    let v: Vec<_> = (0..128).collect();
+    serial.extend(&v);
+    parallel.par_extend(&v);
+    assert_eq!(
+        serial.clone().into_sorted_vec(),
+        parallel.clone().into_sorted_vec()
+    );
+
+    // extend with values
+    serial.extend(-128..0);
+    parallel.par_extend(-128..0);
+    assert_eq!(serial.into_sorted_vec(), parallel.into_sorted_vec());
+}
+
+#[test]
+fn check_extend_pairs() {
+    fn check<C>()
+    where
+        C: Default
+            + Eq
+            + Debug
+            + Extend<(usize, i32)>
+            + for<'a> Extend<(&'a usize, &'a i32)>
+            + ParallelExtend<(usize, i32)>
+            + for<'a> ParallelExtend<(&'a usize, &'a i32)>,
+    {
+        let mut serial = C::default();
+        let mut parallel = C::default();
+
+        // extend with references
+        let m: HashMap<_, _> = (0..128).enumerate().collect();
+        serial.extend(&m);
+        parallel.par_extend(&m);
+        assert_eq!(serial, parallel);
+
+        // extend with values
+        let v: Vec<(_, _)> = (-128..0).enumerate().collect();
+        serial.extend(v.clone());
+        parallel.par_extend(v);
+        assert_eq!(serial, parallel);
+    }
+
+    check::<BTreeMap<usize, i32>>();
+    check::<HashMap<usize, i32>>();
+}
+
+#[test]
+fn check_unzip_into_vecs() {
+    let mut a = vec![];
+    let mut b = vec![];
+    (0..1024)
+        .into_par_iter()
+        .map(|i| i * i)
+        .enumerate()
+        .unzip_into_vecs(&mut a, &mut b);
+
+    let (c, d): (Vec<_>, Vec<_>) = (0..1024).map(|i| i * i).enumerate().unzip();
+    assert_eq!(a, c);
+    assert_eq!(b, d);
+}
+
+#[test]
+fn check_unzip() {
+    // indexed, unindexed
+    let (a, b): (Vec<_>, HashSet<_>) = (0..1024).into_par_iter().map(|i| i * i).enumerate().unzip();
+    let (c, d): (Vec<_>, HashSet<_>) = (0..1024).map(|i| i * i).enumerate().unzip();
+    assert_eq!(a, c);
+    assert_eq!(b, d);
+
+    // unindexed, indexed
+    let (a, b): (HashSet<_>, Vec<_>) = (0..1024).into_par_iter().map(|i| i * i).enumerate().unzip();
+    let (c, d): (HashSet<_>, Vec<_>) = (0..1024).map(|i| i * i).enumerate().unzip();
+    assert_eq!(a, c);
+    assert_eq!(b, d);
+
+    // indexed, indexed
+    let (a, b): (Vec<_>, Vec<_>) = (0..1024).into_par_iter().map(|i| i * i).enumerate().unzip();
+    let (c, d): (Vec<_>, Vec<_>) = (0..1024).map(|i| i * i).enumerate().unzip();
+    assert_eq!(a, c);
+    assert_eq!(b, d);
+
+    // unindexed producer
+    let (a, b): (Vec<_>, Vec<_>) = (0..1024)
+        .into_par_iter()
+        .filter_map(|i| Some((i, i * i)))
+        .unzip();
+    let (c, d): (Vec<_>, Vec<_>) = (0..1024).map(|i| (i, i * i)).unzip();
+    assert_eq!(a, c);
+    assert_eq!(b, d);
+}
+
+#[test]
+fn check_partition() {
+    let (a, b): (Vec<_>, Vec<_>) = (0..1024).into_par_iter().partition(|&i| i % 3 == 0);
+    let (c, d): (Vec<_>, Vec<_>) = (0..1024).partition(|&i| i % 3 == 0);
+    assert_eq!(a, c);
+    assert_eq!(b, d);
+}
+
+#[test]
+fn check_partition_map() {
+    let input = "a b c 1 2 3 x y z";
+    let (a, b): (Vec<_>, String) =
+        input
+            .par_split_whitespace()
+            .partition_map(|s| match s.parse::<i32>() {
+                Ok(n) => Either::Left(n),
+                Err(_) => Either::Right(s),
+            });
+    assert_eq!(a, vec![1, 2, 3]);
+    assert_eq!(b, "abcxyz");
+}
+
+#[test]
+fn check_either() {
+    type I = crate::vec::IntoIter<i32>;
+    type E = Either<I, I>;
+
+    let v: Vec<i32> = (0..1024).collect();
+
+    // try iterating the left side
+    let left: E = Either::Left(v.clone().into_par_iter());
+    assert!(left.eq(v.clone()));
+
+    // try iterating the right side
+    let right: E = Either::Right(v.clone().into_par_iter());
+    assert!(right.eq(v.clone()));
+
+    // try an indexed iterator
+    let left: E = Either::Left(v.clone().into_par_iter());
+    assert!(left.enumerate().eq(v.clone().into_par_iter().enumerate()));
+}
+
+#[test]
+fn check_either_extend() {
+    type E = Either<Vec<i32>, HashSet<i32>>;
+
+    let v: Vec<i32> = (0..1024).collect();
+
+    // try extending the left side
+    let mut left: E = Either::Left(vec![]);
+    left.par_extend(v.clone());
+    assert_eq!(left.as_ref(), Either::Left(&v));
+
+    // try extending the right side
+    let mut right: E = Either::Right(HashSet::default());
+    right.par_extend(v.clone());
+    assert_eq!(right, Either::Right(v.iter().cloned().collect()));
+}
+
+#[test]
+fn check_interleave_eq() {
+    let xs: Vec<usize> = (0..10).collect();
+    let ys: Vec<usize> = (10..20).collect();
+
+    let mut actual = vec![];
+    xs.par_iter()
+        .interleave(&ys)
+        .map(|&i| i)
+        .collect_into_vec(&mut actual);
+
+    let expected: Vec<usize> = (0..10)
+        .zip(10..20)
+        .flat_map(|(i, j)| vec![i, j].into_iter())
+        .collect();
+    assert_eq!(expected, actual);
+}
+
+#[test]
+fn check_interleave_uneven() {
+    let cases: Vec<(Vec<usize>, Vec<usize>, Vec<usize>)> = vec![
+        (
+            (0..9).collect(),
+            vec![10],
+            vec![0, 10, 1, 2, 3, 4, 5, 6, 7, 8],
+        ),
+        (
+            vec![10],
+            (0..9).collect(),
+            vec![10, 0, 1, 2, 3, 4, 5, 6, 7, 8],
+        ),
+        (
+            (0..5).collect(),
+            (5..10).collect(),
+            (0..5)
+                .zip(5..10)
+                .flat_map(|(i, j)| vec![i, j].into_iter())
+                .collect(),
+        ),
+        (vec![], (0..9).collect(), (0..9).collect()),
+        ((0..9).collect(), vec![], (0..9).collect()),
+        (
+            (0..50).collect(),
+            (50..100).collect(),
+            (0..50)
+                .zip(50..100)
+                .flat_map(|(i, j)| vec![i, j].into_iter())
+                .collect(),
+        ),
+    ];
+
+    for (i, (xs, ys, expected)) in cases.into_iter().enumerate() {
+        let mut res = vec![];
+        xs.par_iter()
+            .interleave(&ys)
+            .map(|&i| i)
+            .collect_into_vec(&mut res);
+        assert_eq!(expected, res, "Case {} failed", i);
+
+        res.truncate(0);
+        xs.par_iter()
+            .interleave(&ys)
+            .rev()
+            .map(|&i| i)
+            .collect_into_vec(&mut res);
+        assert_eq!(
+            expected.into_iter().rev().collect::<Vec<usize>>(),
+            res,
+            "Case {} reversed failed",
+            i
+        );
+    }
+}
+
+#[test]
+fn check_interleave_shortest() {
+    let cases: Vec<(Vec<usize>, Vec<usize>, Vec<usize>)> = vec![
+        ((0..9).collect(), vec![10], vec![0, 10, 1]),
+        (vec![10], (0..9).collect(), vec![10, 0]),
+        (
+            (0..5).collect(),
+            (5..10).collect(),
+            (0..5)
+                .zip(5..10)
+                .flat_map(|(i, j)| vec![i, j].into_iter())
+                .collect(),
+        ),
+        (vec![], (0..9).collect(), vec![]),
+        ((0..9).collect(), vec![], vec![0]),
+        (
+            (0..50).collect(),
+            (50..100).collect(),
+            (0..50)
+                .zip(50..100)
+                .flat_map(|(i, j)| vec![i, j].into_iter())
+                .collect(),
+        ),
+    ];
+
+    for (i, (xs, ys, expected)) in cases.into_iter().enumerate() {
+        let mut res = vec![];
+        xs.par_iter()
+            .interleave_shortest(&ys)
+            .map(|&i| i)
+            .collect_into_vec(&mut res);
+        assert_eq!(expected, res, "Case {} failed", i);
+
+        res.truncate(0);
+        xs.par_iter()
+            .interleave_shortest(&ys)
+            .rev()
+            .map(|&i| i)
+            .collect_into_vec(&mut res);
+        assert_eq!(
+            expected.into_iter().rev().collect::<Vec<usize>>(),
+            res,
+            "Case {} reversed failed",
+            i
+        );
+    }
+}
+
+#[test]
+#[should_panic(expected = "chunk_size must not be zero")]
+fn check_chunks_zero_size() {
+    let _: Vec<Vec<i32>> = vec![1, 2, 3].into_par_iter().chunks(0).collect();
+}
+
+#[test]
+fn check_chunks_even_size() {
+    assert_eq!(
+        vec![vec![1, 2, 3], vec![4, 5, 6], vec![7, 8, 9]],
+        (1..10).into_par_iter().chunks(3).collect::<Vec<Vec<i32>>>()
+    );
+}
+
+#[test]
+fn check_chunks_empty() {
+    let v: Vec<i32> = vec![];
+    let expected: Vec<Vec<i32>> = vec![];
+    assert_eq!(
+        expected,
+        v.into_par_iter().chunks(2).collect::<Vec<Vec<i32>>>()
+    );
+}
+
+#[test]
+fn check_chunks_len() {
+    assert_eq!(4, (0..8).into_par_iter().chunks(2).len());
+    assert_eq!(3, (0..9).into_par_iter().chunks(3).len());
+    assert_eq!(3, (0..8).into_par_iter().chunks(3).len());
+    assert_eq!(1, (&[1]).par_iter().chunks(3).len());
+    assert_eq!(0, (0..0).into_par_iter().chunks(3).len());
+}
+
+#[test]
+fn check_chunks_uneven() {
+    let cases: Vec<(Vec<u32>, usize, Vec<Vec<u32>>)> = vec![
+        ((0..5).collect(), 3, vec![vec![0, 1, 2], vec![3, 4]]),
+        (vec![1], 5, vec![vec![1]]),
+        ((0..4).collect(), 3, vec![vec![0, 1, 2], vec![3]]),
+    ];
+
+    for (i, (v, n, expected)) in cases.into_iter().enumerate() {
+        let mut res: Vec<Vec<u32>> = vec![];
+        v.par_iter()
+            .chunks(n)
+            .map(|v| v.into_iter().cloned().collect())
+            .collect_into_vec(&mut res);
+        assert_eq!(expected, res, "Case {} failed", i);
+
+        res.truncate(0);
+        v.into_par_iter().chunks(n).rev().collect_into_vec(&mut res);
+        assert_eq!(
+            expected.into_iter().rev().collect::<Vec<Vec<u32>>>(),
+            res,
+            "Case {} reversed failed",
+            i
+        );
+    }
+}
+
+#[test]
+#[ignore] // it's quick enough on optimized 32-bit platforms, but otherwise... ... ...
+#[should_panic(expected = "overflow")]
+#[cfg(debug_assertions)]
+fn check_repeat_unbounded() {
+    // use just one thread, so we don't get infinite adaptive splitting
+    // (forever stealing and re-splitting jobs that will panic on overflow)
+    let pool = ThreadPoolBuilder::new().num_threads(1).build().unwrap();
+    pool.install(|| {
+        println!("counted {} repeats", repeat(()).count());
+    });
+}
+
+#[test]
+fn check_repeat_find_any() {
+    let even = repeat(4).find_any(|&x| x % 2 == 0);
+    assert_eq!(even, Some(4));
+}
+
+#[test]
+fn check_repeat_take() {
+    let v: Vec<_> = repeat(4).take(4).collect();
+    assert_eq!(v, [4, 4, 4, 4]);
+}
+
+#[test]
+fn check_repeat_zip() {
+    let v = vec![4, 4, 4, 4];
+    let mut fours: Vec<_> = repeat(4).zip(v).collect();
+    assert_eq!(fours.len(), 4);
+    while let Some(item) = fours.pop() {
+        assert_eq!(item, (4, 4));
+    }
+}
+
+#[test]
+fn check_repeatn_zip_left() {
+    let v = vec![4, 4, 4, 4];
+    let mut fours: Vec<_> = repeatn(4, usize::MAX).zip(v).collect();
+    assert_eq!(fours.len(), 4);
+    while let Some(item) = fours.pop() {
+        assert_eq!(item, (4, 4));
+    }
+}
+
+#[test]
+fn check_repeatn_zip_right() {
+    let v = vec![4, 4, 4, 4];
+    let mut fours: Vec<_> = v.into_par_iter().zip(repeatn(4, usize::MAX)).collect();
+    assert_eq!(fours.len(), 4);
+    while let Some(item) = fours.pop() {
+        assert_eq!(item, (4, 4));
+    }
+}
+
+#[test]
+fn check_empty() {
+    // drive_unindexed
+    let mut v: Vec<i32> = empty().filter(|_| unreachable!()).collect();
+    assert!(v.is_empty());
+
+    // drive (indexed)
+    empty().collect_into_vec(&mut v);
+    assert!(v.is_empty());
+
+    // with_producer
+    let v: Vec<(i32, i32)> = empty().zip(1..10).collect();
+    assert!(v.is_empty());
+}
+
+#[test]
+fn check_once() {
+    // drive_unindexed
+    let mut v: Vec<i32> = once(42).filter(|_| true).collect();
+    assert_eq!(v, &[42]);
+
+    // drive (indexed)
+    once(42).collect_into_vec(&mut v);
+    assert_eq!(v, &[42]);
+
+    // with_producer
+    let v: Vec<(i32, i32)> = once(42).zip(1..10).collect();
+    assert_eq!(v, &[(42, 1)]);
+}
+
+#[test]
+fn check_update() {
+    let mut v: Vec<Vec<_>> = vec![vec![1], vec![3, 2, 1]];
+    v.par_iter_mut().update(|v| v.push(0)).for_each(|_| ());
+
+    assert_eq!(v, vec![vec![1, 0], vec![3, 2, 1, 0]]);
+}
diff --git a/src/iter/try_fold.rs b/src/iter/try_fold.rs
new file mode 100644
index 0000000..c1a57a4
--- /dev/null
+++ b/src/iter/try_fold.rs
@@ -0,0 +1,294 @@
+use super::plumbing::*;
+use super::*;
+
+use super::private::Try;
+use std::fmt::{self, Debug};
+use std::marker::PhantomData;
+
+impl<U, I, ID, F> TryFold<I, U, ID, F>
+where
+    I: ParallelIterator,
+    F: Fn(U::Ok, I::Item) -> U + Sync + Send,
+    ID: Fn() -> U::Ok + Sync + Send,
+    U: Try + Send,
+{
+    pub(super) fn new(base: I, identity: ID, fold_op: F) -> Self {
+        TryFold {
+            base,
+            identity,
+            fold_op,
+            marker: PhantomData,
+        }
+    }
+}
+
+/// `TryFold` is an iterator that applies a function over an iterator producing a single value.
+/// This struct is created by the [`try_fold()`] method on [`ParallelIterator`]
+///
+/// [`try_fold()`]: trait.ParallelIterator.html#method.try_fold
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Clone)]
+pub struct TryFold<I, U, ID, F> {
+    base: I,
+    identity: ID,
+    fold_op: F,
+    marker: PhantomData<U>,
+}
+
+impl<U, I: ParallelIterator + Debug, ID, F> Debug for TryFold<I, U, ID, F> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("TryFold").field("base", &self.base).finish()
+    }
+}
+
+impl<U, I, ID, F> ParallelIterator for TryFold<I, U, ID, F>
+where
+    I: ParallelIterator,
+    F: Fn(U::Ok, I::Item) -> U + Sync + Send,
+    ID: Fn() -> U::Ok + Sync + Send,
+    U: Try + Send,
+{
+    type Item = U;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let consumer1 = TryFoldConsumer {
+            base: consumer,
+            identity: &self.identity,
+            fold_op: &self.fold_op,
+            marker: PhantomData,
+        };
+        self.base.drive_unindexed(consumer1)
+    }
+}
+
+struct TryFoldConsumer<'c, U, C, ID, F> {
+    base: C,
+    identity: &'c ID,
+    fold_op: &'c F,
+    marker: PhantomData<U>,
+}
+
+impl<'r, U, T, C, ID, F> Consumer<T> for TryFoldConsumer<'r, U, C, ID, F>
+where
+    C: Consumer<U>,
+    F: Fn(U::Ok, T) -> U + Sync,
+    ID: Fn() -> U::Ok + Sync,
+    U: Try + Send,
+{
+    type Folder = TryFoldFolder<'r, C::Folder, U, F>;
+    type Reducer = C::Reducer;
+    type Result = C::Result;
+
+    fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) {
+        let (left, right, reducer) = self.base.split_at(index);
+        (
+            TryFoldConsumer { base: left, ..self },
+            TryFoldConsumer {
+                base: right,
+                ..self
+            },
+            reducer,
+        )
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        TryFoldFolder {
+            base: self.base.into_folder(),
+            result: Ok((self.identity)()),
+            fold_op: self.fold_op,
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
+
+impl<'r, U, T, C, ID, F> UnindexedConsumer<T> for TryFoldConsumer<'r, U, C, ID, F>
+where
+    C: UnindexedConsumer<U>,
+    F: Fn(U::Ok, T) -> U + Sync,
+    ID: Fn() -> U::Ok + Sync,
+    U: Try + Send,
+{
+    fn split_off_left(&self) -> Self {
+        TryFoldConsumer {
+            base: self.base.split_off_left(),
+            ..*self
+        }
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        self.base.to_reducer()
+    }
+}
+
+struct TryFoldFolder<'r, C, U: Try, F> {
+    base: C,
+    fold_op: &'r F,
+    result: Result<U::Ok, U::Error>,
+}
+
+impl<'r, C, U, F, T> Folder<T> for TryFoldFolder<'r, C, U, F>
+where
+    C: Folder<U>,
+    F: Fn(U::Ok, T) -> U + Sync,
+    U: Try,
+{
+    type Result = C::Result;
+
+    fn consume(mut self, item: T) -> Self {
+        let fold_op = self.fold_op;
+        if let Ok(acc) = self.result {
+            self.result = fold_op(acc, item).into_result();
+        }
+        self
+    }
+
+    fn complete(self) -> C::Result {
+        let item = match self.result {
+            Ok(ok) => U::from_ok(ok),
+            Err(error) => U::from_error(error),
+        };
+        self.base.consume(item).complete()
+    }
+
+    fn full(&self) -> bool {
+        self.result.is_err() || self.base.full()
+    }
+}
+
+// ///////////////////////////////////////////////////////////////////////////
+
+impl<U, I, F> TryFoldWith<I, U, F>
+where
+    I: ParallelIterator,
+    F: Fn(U::Ok, I::Item) -> U + Sync,
+    U: Try + Send,
+    U::Ok: Clone + Send,
+{
+    pub(super) fn new(base: I, item: U::Ok, fold_op: F) -> Self {
+        TryFoldWith {
+            base,
+            item,
+            fold_op,
+        }
+    }
+}
+
+/// `TryFoldWith` is an iterator that applies a function over an iterator producing a single value.
+/// This struct is created by the [`try_fold_with()`] method on [`ParallelIterator`]
+///
+/// [`try_fold_with()`]: trait.ParallelIterator.html#method.try_fold_with
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Clone)]
+pub struct TryFoldWith<I, U: Try, F> {
+    base: I,
+    item: U::Ok,
+    fold_op: F,
+}
+
+impl<I: ParallelIterator + Debug, U: Try, F> Debug for TryFoldWith<I, U, F>
+where
+    U::Ok: Debug,
+{
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("TryFoldWith")
+            .field("base", &self.base)
+            .field("item", &self.item)
+            .finish()
+    }
+}
+
+impl<U, I, F> ParallelIterator for TryFoldWith<I, U, F>
+where
+    I: ParallelIterator,
+    F: Fn(U::Ok, I::Item) -> U + Sync + Send,
+    U: Try + Send,
+    U::Ok: Clone + Send,
+{
+    type Item = U;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let consumer1 = TryFoldWithConsumer {
+            base: consumer,
+            item: self.item,
+            fold_op: &self.fold_op,
+        };
+        self.base.drive_unindexed(consumer1)
+    }
+}
+
+struct TryFoldWithConsumer<'c, C, U: Try, F> {
+    base: C,
+    item: U::Ok,
+    fold_op: &'c F,
+}
+
+impl<'r, U, T, C, F> Consumer<T> for TryFoldWithConsumer<'r, C, U, F>
+where
+    C: Consumer<U>,
+    F: Fn(U::Ok, T) -> U + Sync,
+    U: Try + Send,
+    U::Ok: Clone + Send,
+{
+    type Folder = TryFoldFolder<'r, C::Folder, U, F>;
+    type Reducer = C::Reducer;
+    type Result = C::Result;
+
+    fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) {
+        let (left, right, reducer) = self.base.split_at(index);
+        (
+            TryFoldWithConsumer {
+                base: left,
+                item: self.item.clone(),
+                ..self
+            },
+            TryFoldWithConsumer {
+                base: right,
+                ..self
+            },
+            reducer,
+        )
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        TryFoldFolder {
+            base: self.base.into_folder(),
+            result: Ok(self.item),
+            fold_op: self.fold_op,
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
+
+impl<'r, U, T, C, F> UnindexedConsumer<T> for TryFoldWithConsumer<'r, C, U, F>
+where
+    C: UnindexedConsumer<U>,
+    F: Fn(U::Ok, T) -> U + Sync,
+    U: Try + Send,
+    U::Ok: Clone + Send,
+{
+    fn split_off_left(&self) -> Self {
+        TryFoldWithConsumer {
+            base: self.base.split_off_left(),
+            item: self.item.clone(),
+            ..*self
+        }
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        self.base.to_reducer()
+    }
+}
diff --git a/src/iter/try_reduce.rs b/src/iter/try_reduce.rs
new file mode 100644
index 0000000..76b3850
--- /dev/null
+++ b/src/iter/try_reduce.rs
@@ -0,0 +1,129 @@
+use super::plumbing::*;
+use super::ParallelIterator;
+
+use super::private::Try;
+use std::sync::atomic::{AtomicBool, Ordering};
+
+pub(super) fn try_reduce<PI, R, ID, T>(pi: PI, identity: ID, reduce_op: R) -> T
+where
+    PI: ParallelIterator<Item = T>,
+    R: Fn(T::Ok, T::Ok) -> T + Sync,
+    ID: Fn() -> T::Ok + Sync,
+    T: Try + Send,
+{
+    let full = AtomicBool::new(false);
+    let consumer = TryReduceConsumer {
+        identity: &identity,
+        reduce_op: &reduce_op,
+        full: &full,
+    };
+    pi.drive_unindexed(consumer)
+}
+
+struct TryReduceConsumer<'r, R, ID> {
+    identity: &'r ID,
+    reduce_op: &'r R,
+    full: &'r AtomicBool,
+}
+
+impl<'r, R, ID> Copy for TryReduceConsumer<'r, R, ID> {}
+
+impl<'r, R, ID> Clone for TryReduceConsumer<'r, R, ID> {
+    fn clone(&self) -> Self {
+        *self
+    }
+}
+
+impl<'r, R, ID, T> Consumer<T> for TryReduceConsumer<'r, R, ID>
+where
+    R: Fn(T::Ok, T::Ok) -> T + Sync,
+    ID: Fn() -> T::Ok + Sync,
+    T: Try + Send,
+{
+    type Folder = TryReduceFolder<'r, R, T>;
+    type Reducer = Self;
+    type Result = T;
+
+    fn split_at(self, _index: usize) -> (Self, Self, Self) {
+        (self, self, self)
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        TryReduceFolder {
+            reduce_op: self.reduce_op,
+            result: Ok((self.identity)()),
+            full: self.full,
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.full.load(Ordering::Relaxed)
+    }
+}
+
+impl<'r, R, ID, T> UnindexedConsumer<T> for TryReduceConsumer<'r, R, ID>
+where
+    R: Fn(T::Ok, T::Ok) -> T + Sync,
+    ID: Fn() -> T::Ok + Sync,
+    T: Try + Send,
+{
+    fn split_off_left(&self) -> Self {
+        *self
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        *self
+    }
+}
+
+impl<'r, R, ID, T> Reducer<T> for TryReduceConsumer<'r, R, ID>
+where
+    R: Fn(T::Ok, T::Ok) -> T + Sync,
+    T: Try,
+{
+    fn reduce(self, left: T, right: T) -> T {
+        match (left.into_result(), right.into_result()) {
+            (Ok(left), Ok(right)) => (self.reduce_op)(left, right),
+            (Err(e), _) | (_, Err(e)) => T::from_error(e),
+        }
+    }
+}
+
+struct TryReduceFolder<'r, R, T: Try> {
+    reduce_op: &'r R,
+    result: Result<T::Ok, T::Error>,
+    full: &'r AtomicBool,
+}
+
+impl<'r, R, T> Folder<T> for TryReduceFolder<'r, R, T>
+where
+    R: Fn(T::Ok, T::Ok) -> T,
+    T: Try,
+{
+    type Result = T;
+
+    fn consume(mut self, item: T) -> Self {
+        let reduce_op = self.reduce_op;
+        if let Ok(left) = self.result {
+            self.result = match item.into_result() {
+                Ok(right) => reduce_op(left, right).into_result(),
+                Err(error) => Err(error),
+            };
+        }
+        if self.result.is_err() {
+            self.full.store(true, Ordering::Relaxed)
+        }
+        self
+    }
+
+    fn complete(self) -> T {
+        match self.result {
+            Ok(ok) => T::from_ok(ok),
+            Err(error) => T::from_error(error),
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.full.load(Ordering::Relaxed)
+    }
+}
diff --git a/src/iter/try_reduce_with.rs b/src/iter/try_reduce_with.rs
new file mode 100644
index 0000000..6be3100
--- /dev/null
+++ b/src/iter/try_reduce_with.rs
@@ -0,0 +1,134 @@
+use super::plumbing::*;
+use super::ParallelIterator;
+
+use super::private::Try;
+use std::sync::atomic::{AtomicBool, Ordering};
+
+pub(super) fn try_reduce_with<PI, R, T>(pi: PI, reduce_op: R) -> Option<T>
+where
+    PI: ParallelIterator<Item = T>,
+    R: Fn(T::Ok, T::Ok) -> T + Sync,
+    T: Try + Send,
+{
+    let full = AtomicBool::new(false);
+    let consumer = TryReduceWithConsumer {
+        reduce_op: &reduce_op,
+        full: &full,
+    };
+    pi.drive_unindexed(consumer)
+}
+
+struct TryReduceWithConsumer<'r, R> {
+    reduce_op: &'r R,
+    full: &'r AtomicBool,
+}
+
+impl<'r, R> Copy for TryReduceWithConsumer<'r, R> {}
+
+impl<'r, R> Clone for TryReduceWithConsumer<'r, R> {
+    fn clone(&self) -> Self {
+        *self
+    }
+}
+
+impl<'r, R, T> Consumer<T> for TryReduceWithConsumer<'r, R>
+where
+    R: Fn(T::Ok, T::Ok) -> T + Sync,
+    T: Try + Send,
+{
+    type Folder = TryReduceWithFolder<'r, R, T>;
+    type Reducer = Self;
+    type Result = Option<T>;
+
+    fn split_at(self, _index: usize) -> (Self, Self, Self) {
+        (self, self, self)
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        TryReduceWithFolder {
+            reduce_op: self.reduce_op,
+            opt_result: None,
+            full: self.full,
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.full.load(Ordering::Relaxed)
+    }
+}
+
+impl<'r, R, T> UnindexedConsumer<T> for TryReduceWithConsumer<'r, R>
+where
+    R: Fn(T::Ok, T::Ok) -> T + Sync,
+    T: Try + Send,
+{
+    fn split_off_left(&self) -> Self {
+        *self
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        *self
+    }
+}
+
+impl<'r, R, T> Reducer<Option<T>> for TryReduceWithConsumer<'r, R>
+where
+    R: Fn(T::Ok, T::Ok) -> T + Sync,
+    T: Try,
+{
+    fn reduce(self, left: Option<T>, right: Option<T>) -> Option<T> {
+        let reduce_op = self.reduce_op;
+        match (left, right) {
+            (None, x) | (x, None) => x,
+            (Some(a), Some(b)) => match (a.into_result(), b.into_result()) {
+                (Ok(a), Ok(b)) => Some(reduce_op(a, b)),
+                (Err(e), _) | (_, Err(e)) => Some(T::from_error(e)),
+            },
+        }
+    }
+}
+
+struct TryReduceWithFolder<'r, R, T: Try> {
+    reduce_op: &'r R,
+    opt_result: Option<Result<T::Ok, T::Error>>,
+    full: &'r AtomicBool,
+}
+
+impl<'r, R, T> Folder<T> for TryReduceWithFolder<'r, R, T>
+where
+    R: Fn(T::Ok, T::Ok) -> T,
+    T: Try,
+{
+    type Result = Option<T>;
+
+    fn consume(self, item: T) -> Self {
+        let reduce_op = self.reduce_op;
+        let result = match self.opt_result {
+            None => item.into_result(),
+            Some(Ok(a)) => match item.into_result() {
+                Ok(b) => reduce_op(a, b).into_result(),
+                Err(e) => Err(e),
+            },
+            Some(Err(e)) => Err(e),
+        };
+        if result.is_err() {
+            self.full.store(true, Ordering::Relaxed)
+        }
+        TryReduceWithFolder {
+            opt_result: Some(result),
+            ..self
+        }
+    }
+
+    fn complete(self) -> Option<T> {
+        let result = self.opt_result?;
+        Some(match result {
+            Ok(ok) => T::from_ok(ok),
+            Err(error) => T::from_error(error),
+        })
+    }
+
+    fn full(&self) -> bool {
+        self.full.load(Ordering::Relaxed)
+    }
+}
diff --git a/src/iter/unzip.rs b/src/iter/unzip.rs
new file mode 100644
index 0000000..219b909
--- /dev/null
+++ b/src/iter/unzip.rs
@@ -0,0 +1,464 @@
+use super::plumbing::*;
+use super::*;
+
+/// This trait abstracts the different ways we can "unzip" one parallel
+/// iterator into two distinct consumers, which we can handle almost
+/// identically apart from how to process the individual items.
+trait UnzipOp<T>: Sync + Send {
+    /// The type of item expected by the left consumer.
+    type Left: Send;
+
+    /// The type of item expected by the right consumer.
+    type Right: Send;
+
+    /// Consumes one item and feeds it to one or both of the underlying folders.
+    fn consume<FA, FB>(&self, item: T, left: FA, right: FB) -> (FA, FB)
+    where
+        FA: Folder<Self::Left>,
+        FB: Folder<Self::Right>;
+
+    /// Reports whether this op may support indexed consumers.
+    /// - e.g. true for `unzip` where the item count passed through directly.
+    /// - e.g. false for `partition` where the sorting is not yet known.
+    fn indexable() -> bool {
+        false
+    }
+}
+
+/// Runs an unzip-like operation into default `ParallelExtend` collections.
+fn execute<I, OP, FromA, FromB>(pi: I, op: OP) -> (FromA, FromB)
+where
+    I: ParallelIterator,
+    OP: UnzipOp<I::Item>,
+    FromA: Default + Send + ParallelExtend<OP::Left>,
+    FromB: Default + Send + ParallelExtend<OP::Right>,
+{
+    let mut a = FromA::default();
+    let mut b = FromB::default();
+    execute_into(&mut a, &mut b, pi, op);
+    (a, b)
+}
+
+/// Runs an unzip-like operation into `ParallelExtend` collections.
+fn execute_into<I, OP, FromA, FromB>(a: &mut FromA, b: &mut FromB, pi: I, op: OP)
+where
+    I: ParallelIterator,
+    OP: UnzipOp<I::Item>,
+    FromA: Send + ParallelExtend<OP::Left>,
+    FromB: Send + ParallelExtend<OP::Right>,
+{
+    // We have no idea what the consumers will look like for these
+    // collections' `par_extend`, but we can intercept them in our own
+    // `drive_unindexed`.  Start with the left side, type `A`:
+    let iter = UnzipA { base: pi, op, b };
+    a.par_extend(iter);
+}
+
+/// Unzips the items of a parallel iterator into a pair of arbitrary
+/// `ParallelExtend` containers.
+///
+/// This is called by `ParallelIterator::unzip`.
+pub(super) fn unzip<I, A, B, FromA, FromB>(pi: I) -> (FromA, FromB)
+where
+    I: ParallelIterator<Item = (A, B)>,
+    FromA: Default + Send + ParallelExtend<A>,
+    FromB: Default + Send + ParallelExtend<B>,
+    A: Send,
+    B: Send,
+{
+    execute(pi, Unzip)
+}
+
+/// Unzips an `IndexedParallelIterator` into two arbitrary `Consumer`s.
+///
+/// This is called by `super::collect::unzip_into_vecs`.
+pub(super) fn unzip_indexed<I, A, B, CA, CB>(pi: I, left: CA, right: CB) -> (CA::Result, CB::Result)
+where
+    I: IndexedParallelIterator<Item = (A, B)>,
+    CA: Consumer<A>,
+    CB: Consumer<B>,
+    A: Send,
+    B: Send,
+{
+    let consumer = UnzipConsumer {
+        op: &Unzip,
+        left,
+        right,
+    };
+    pi.drive(consumer)
+}
+
+/// An `UnzipOp` that splits a tuple directly into the two consumers.
+struct Unzip;
+
+impl<A: Send, B: Send> UnzipOp<(A, B)> for Unzip {
+    type Left = A;
+    type Right = B;
+
+    fn consume<FA, FB>(&self, item: (A, B), left: FA, right: FB) -> (FA, FB)
+    where
+        FA: Folder<A>,
+        FB: Folder<B>,
+    {
+        (left.consume(item.0), right.consume(item.1))
+    }
+
+    fn indexable() -> bool {
+        true
+    }
+}
+
+/// Partitions the items of a parallel iterator into a pair of arbitrary
+/// `ParallelExtend` containers.
+///
+/// This is called by `ParallelIterator::partition`.
+pub(super) fn partition<I, A, B, P>(pi: I, predicate: P) -> (A, B)
+where
+    I: ParallelIterator,
+    A: Default + Send + ParallelExtend<I::Item>,
+    B: Default + Send + ParallelExtend<I::Item>,
+    P: Fn(&I::Item) -> bool + Sync + Send,
+{
+    execute(pi, Partition { predicate })
+}
+
+/// An `UnzipOp` that routes items depending on a predicate function.
+struct Partition<P> {
+    predicate: P,
+}
+
+impl<P, T> UnzipOp<T> for Partition<P>
+where
+    P: Fn(&T) -> bool + Sync + Send,
+    T: Send,
+{
+    type Left = T;
+    type Right = T;
+
+    fn consume<FA, FB>(&self, item: T, left: FA, right: FB) -> (FA, FB)
+    where
+        FA: Folder<T>,
+        FB: Folder<T>,
+    {
+        if (self.predicate)(&item) {
+            (left.consume(item), right)
+        } else {
+            (left, right.consume(item))
+        }
+    }
+}
+
+/// Partitions and maps the items of a parallel iterator into a pair of
+/// arbitrary `ParallelExtend` containers.
+///
+/// This called by `ParallelIterator::partition_map`.
+pub(super) fn partition_map<I, A, B, P, L, R>(pi: I, predicate: P) -> (A, B)
+where
+    I: ParallelIterator,
+    A: Default + Send + ParallelExtend<L>,
+    B: Default + Send + ParallelExtend<R>,
+    P: Fn(I::Item) -> Either<L, R> + Sync + Send,
+    L: Send,
+    R: Send,
+{
+    execute(pi, PartitionMap { predicate })
+}
+
+/// An `UnzipOp` that routes items depending on how they are mapped `Either`.
+struct PartitionMap<P> {
+    predicate: P,
+}
+
+impl<P, L, R, T> UnzipOp<T> for PartitionMap<P>
+where
+    P: Fn(T) -> Either<L, R> + Sync + Send,
+    L: Send,
+    R: Send,
+{
+    type Left = L;
+    type Right = R;
+
+    fn consume<FA, FB>(&self, item: T, left: FA, right: FB) -> (FA, FB)
+    where
+        FA: Folder<L>,
+        FB: Folder<R>,
+    {
+        match (self.predicate)(item) {
+            Either::Left(item) => (left.consume(item), right),
+            Either::Right(item) => (left, right.consume(item)),
+        }
+    }
+}
+
+/// A fake iterator to intercept the `Consumer` for type `A`.
+struct UnzipA<'b, I, OP, FromB> {
+    base: I,
+    op: OP,
+    b: &'b mut FromB,
+}
+
+impl<'b, I, OP, FromB> ParallelIterator for UnzipA<'b, I, OP, FromB>
+where
+    I: ParallelIterator,
+    OP: UnzipOp<I::Item>,
+    FromB: Send + ParallelExtend<OP::Right>,
+{
+    type Item = OP::Left;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let mut result = None;
+        {
+            // Now it's time to find the consumer for type `B`
+            let iter = UnzipB {
+                base: self.base,
+                op: self.op,
+                left_consumer: consumer,
+                left_result: &mut result,
+            };
+            self.b.par_extend(iter);
+        }
+        // NB: If for some reason `b.par_extend` doesn't actually drive the
+        // iterator, then we won't have a result for the left side to return
+        // at all.  We can't fake an arbitrary consumer's result, so panic.
+        result.expect("unzip consumers didn't execute!")
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        if OP::indexable() {
+            self.base.opt_len()
+        } else {
+            None
+        }
+    }
+}
+
+/// A fake iterator to intercept the `Consumer` for type `B`.
+struct UnzipB<'r, I, OP, CA>
+where
+    I: ParallelIterator,
+    OP: UnzipOp<I::Item>,
+    CA: UnindexedConsumer<OP::Left>,
+    CA::Result: 'r,
+{
+    base: I,
+    op: OP,
+    left_consumer: CA,
+    left_result: &'r mut Option<CA::Result>,
+}
+
+impl<'r, I, OP, CA> ParallelIterator for UnzipB<'r, I, OP, CA>
+where
+    I: ParallelIterator,
+    OP: UnzipOp<I::Item>,
+    CA: UnindexedConsumer<OP::Left>,
+{
+    type Item = OP::Right;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        // Now that we have two consumers, we can unzip the real iterator.
+        let consumer = UnzipConsumer {
+            op: &self.op,
+            left: self.left_consumer,
+            right: consumer,
+        };
+
+        let result = self.base.drive_unindexed(consumer);
+        *self.left_result = Some(result.0);
+        result.1
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        if OP::indexable() {
+            self.base.opt_len()
+        } else {
+            None
+        }
+    }
+}
+
+/// `Consumer` that unzips into two other `Consumer`s
+struct UnzipConsumer<'a, OP, CA, CB> {
+    op: &'a OP,
+    left: CA,
+    right: CB,
+}
+
+impl<'a, T, OP, CA, CB> Consumer<T> for UnzipConsumer<'a, OP, CA, CB>
+where
+    OP: UnzipOp<T>,
+    CA: Consumer<OP::Left>,
+    CB: Consumer<OP::Right>,
+{
+    type Folder = UnzipFolder<'a, OP, CA::Folder, CB::Folder>;
+    type Reducer = UnzipReducer<CA::Reducer, CB::Reducer>;
+    type Result = (CA::Result, CB::Result);
+
+    fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) {
+        let (left1, left2, left_reducer) = self.left.split_at(index);
+        let (right1, right2, right_reducer) = self.right.split_at(index);
+
+        (
+            UnzipConsumer {
+                op: self.op,
+                left: left1,
+                right: right1,
+            },
+            UnzipConsumer {
+                op: self.op,
+                left: left2,
+                right: right2,
+            },
+            UnzipReducer {
+                left: left_reducer,
+                right: right_reducer,
+            },
+        )
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        UnzipFolder {
+            op: self.op,
+            left: self.left.into_folder(),
+            right: self.right.into_folder(),
+        }
+    }
+
+    fn full(&self) -> bool {
+        // don't stop until everyone is full
+        self.left.full() && self.right.full()
+    }
+}
+
+impl<'a, T, OP, CA, CB> UnindexedConsumer<T> for UnzipConsumer<'a, OP, CA, CB>
+where
+    OP: UnzipOp<T>,
+    CA: UnindexedConsumer<OP::Left>,
+    CB: UnindexedConsumer<OP::Right>,
+{
+    fn split_off_left(&self) -> Self {
+        UnzipConsumer {
+            op: self.op,
+            left: self.left.split_off_left(),
+            right: self.right.split_off_left(),
+        }
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        UnzipReducer {
+            left: self.left.to_reducer(),
+            right: self.right.to_reducer(),
+        }
+    }
+}
+
+/// `Folder` that unzips into two other `Folder`s
+struct UnzipFolder<'a, OP, FA, FB> {
+    op: &'a OP,
+    left: FA,
+    right: FB,
+}
+
+impl<'a, T, OP, FA, FB> Folder<T> for UnzipFolder<'a, OP, FA, FB>
+where
+    OP: UnzipOp<T>,
+    FA: Folder<OP::Left>,
+    FB: Folder<OP::Right>,
+{
+    type Result = (FA::Result, FB::Result);
+
+    fn consume(self, item: T) -> Self {
+        let (left, right) = self.op.consume(item, self.left, self.right);
+        UnzipFolder {
+            op: self.op,
+            left,
+            right,
+        }
+    }
+
+    fn complete(self) -> Self::Result {
+        (self.left.complete(), self.right.complete())
+    }
+
+    fn full(&self) -> bool {
+        // don't stop until everyone is full
+        self.left.full() && self.right.full()
+    }
+}
+
+/// `Reducer` that unzips into two other `Reducer`s
+struct UnzipReducer<RA, RB> {
+    left: RA,
+    right: RB,
+}
+
+impl<A, B, RA, RB> Reducer<(A, B)> for UnzipReducer<RA, RB>
+where
+    RA: Reducer<A>,
+    RB: Reducer<B>,
+{
+    fn reduce(self, left: (A, B), right: (A, B)) -> (A, B) {
+        (
+            self.left.reduce(left.0, right.0),
+            self.right.reduce(left.1, right.1),
+        )
+    }
+}
+
+impl<A, B, FromA, FromB> ParallelExtend<(A, B)> for (FromA, FromB)
+where
+    A: Send,
+    B: Send,
+    FromA: Send + ParallelExtend<A>,
+    FromB: Send + ParallelExtend<B>,
+{
+    fn par_extend<I>(&mut self, pi: I)
+    where
+        I: IntoParallelIterator<Item = (A, B)>,
+    {
+        execute_into(&mut self.0, &mut self.1, pi.into_par_iter(), Unzip);
+    }
+}
+
+impl<L, R, A, B> ParallelExtend<Either<L, R>> for (A, B)
+where
+    L: Send,
+    R: Send,
+    A: Send + ParallelExtend<L>,
+    B: Send + ParallelExtend<R>,
+{
+    fn par_extend<I>(&mut self, pi: I)
+    where
+        I: IntoParallelIterator<Item = Either<L, R>>,
+    {
+        execute_into(&mut self.0, &mut self.1, pi.into_par_iter(), UnEither);
+    }
+}
+
+/// An `UnzipOp` that routes items depending on their `Either` variant.
+struct UnEither;
+
+impl<L, R> UnzipOp<Either<L, R>> for UnEither
+where
+    L: Send,
+    R: Send,
+{
+    type Left = L;
+    type Right = R;
+
+    fn consume<FL, FR>(&self, item: Either<L, R>, left: FL, right: FR) -> (FL, FR)
+    where
+        FL: Folder<L>,
+        FR: Folder<R>,
+    {
+        match item {
+            Either::Left(item) => (left.consume(item), right),
+            Either::Right(item) => (left, right.consume(item)),
+        }
+    }
+}
diff --git a/src/iter/update.rs b/src/iter/update.rs
new file mode 100644
index 0000000..373a4d7
--- /dev/null
+++ b/src/iter/update.rs
@@ -0,0 +1,327 @@
+use super::plumbing::*;
+use super::*;
+
+use std::fmt::{self, Debug};
+
+/// `Update` is an iterator that mutates the elements of an
+/// underlying iterator before they are yielded.
+///
+/// This struct is created by the [`update()`] method on [`ParallelIterator`]
+///
+/// [`update()`]: trait.ParallelIterator.html#method.update
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Clone)]
+pub struct Update<I: ParallelIterator, F> {
+    base: I,
+    update_op: F,
+}
+
+impl<I: ParallelIterator + Debug, F> Debug for Update<I, F> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("Update").field("base", &self.base).finish()
+    }
+}
+
+impl<I, F> Update<I, F>
+where
+    I: ParallelIterator,
+{
+    /// Creates a new `Update` iterator.
+    pub(super) fn new(base: I, update_op: F) -> Self {
+        Update { base, update_op }
+    }
+}
+
+impl<I, F> ParallelIterator for Update<I, F>
+where
+    I: ParallelIterator,
+    F: Fn(&mut I::Item) + Send + Sync,
+{
+    type Item = I::Item;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let consumer1 = UpdateConsumer::new(consumer, &self.update_op);
+        self.base.drive_unindexed(consumer1)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        self.base.opt_len()
+    }
+}
+
+impl<I, F> IndexedParallelIterator for Update<I, F>
+where
+    I: IndexedParallelIterator,
+    F: Fn(&mut I::Item) + Send + Sync,
+{
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        let consumer1 = UpdateConsumer::new(consumer, &self.update_op);
+        self.base.drive(consumer1)
+    }
+
+    fn len(&self) -> usize {
+        self.base.len()
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        return self.base.with_producer(Callback {
+            callback,
+            update_op: self.update_op,
+        });
+
+        struct Callback<CB, F> {
+            callback: CB,
+            update_op: F,
+        }
+
+        impl<T, F, CB> ProducerCallback<T> for Callback<CB, F>
+        where
+            CB: ProducerCallback<T>,
+            F: Fn(&mut T) + Send + Sync,
+        {
+            type Output = CB::Output;
+
+            fn callback<P>(self, base: P) -> CB::Output
+            where
+                P: Producer<Item = T>,
+            {
+                let producer = UpdateProducer {
+                    base,
+                    update_op: &self.update_op,
+                };
+                self.callback.callback(producer)
+            }
+        }
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+
+struct UpdateProducer<'f, P, F> {
+    base: P,
+    update_op: &'f F,
+}
+
+impl<'f, P, F> Producer for UpdateProducer<'f, P, F>
+where
+    P: Producer,
+    F: Fn(&mut P::Item) + Send + Sync,
+{
+    type Item = P::Item;
+    type IntoIter = UpdateSeq<P::IntoIter, &'f F>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        UpdateSeq {
+            base: self.base.into_iter(),
+            update_op: self.update_op,
+        }
+    }
+
+    fn min_len(&self) -> usize {
+        self.base.min_len()
+    }
+    fn max_len(&self) -> usize {
+        self.base.max_len()
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        let (left, right) = self.base.split_at(index);
+        (
+            UpdateProducer {
+                base: left,
+                update_op: self.update_op,
+            },
+            UpdateProducer {
+                base: right,
+                update_op: self.update_op,
+            },
+        )
+    }
+
+    fn fold_with<G>(self, folder: G) -> G
+    where
+        G: Folder<Self::Item>,
+    {
+        let folder1 = UpdateFolder {
+            base: folder,
+            update_op: self.update_op,
+        };
+        self.base.fold_with(folder1).base
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// Consumer implementation
+
+struct UpdateConsumer<'f, C, F> {
+    base: C,
+    update_op: &'f F,
+}
+
+impl<'f, C, F> UpdateConsumer<'f, C, F> {
+    fn new(base: C, update_op: &'f F) -> Self {
+        UpdateConsumer { base, update_op }
+    }
+}
+
+impl<'f, T, C, F> Consumer<T> for UpdateConsumer<'f, C, F>
+where
+    C: Consumer<T>,
+    F: Fn(&mut T) + Send + Sync,
+{
+    type Folder = UpdateFolder<'f, C::Folder, F>;
+    type Reducer = C::Reducer;
+    type Result = C::Result;
+
+    fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) {
+        let (left, right, reducer) = self.base.split_at(index);
+        (
+            UpdateConsumer::new(left, self.update_op),
+            UpdateConsumer::new(right, self.update_op),
+            reducer,
+        )
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        UpdateFolder {
+            base: self.base.into_folder(),
+            update_op: self.update_op,
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
+
+impl<'f, T, C, F> UnindexedConsumer<T> for UpdateConsumer<'f, C, F>
+where
+    C: UnindexedConsumer<T>,
+    F: Fn(&mut T) + Send + Sync,
+{
+    fn split_off_left(&self) -> Self {
+        UpdateConsumer::new(self.base.split_off_left(), &self.update_op)
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        self.base.to_reducer()
+    }
+}
+
+struct UpdateFolder<'f, C, F> {
+    base: C,
+    update_op: &'f F,
+}
+
+fn apply<T>(update_op: impl Fn(&mut T)) -> impl Fn(T) -> T {
+    move |mut item| {
+        update_op(&mut item);
+        item
+    }
+}
+
+impl<'f, T, C, F> Folder<T> for UpdateFolder<'f, C, F>
+where
+    C: Folder<T>,
+    F: Fn(&mut T),
+{
+    type Result = C::Result;
+
+    fn consume(self, mut item: T) -> Self {
+        (self.update_op)(&mut item);
+
+        UpdateFolder {
+            base: self.base.consume(item),
+            update_op: self.update_op,
+        }
+    }
+
+    fn consume_iter<I>(mut self, iter: I) -> Self
+    where
+        I: IntoIterator<Item = T>,
+    {
+        let update_op = self.update_op;
+        self.base = self
+            .base
+            .consume_iter(iter.into_iter().map(apply(update_op)));
+        self
+    }
+
+    fn complete(self) -> C::Result {
+        self.base.complete()
+    }
+
+    fn full(&self) -> bool {
+        self.base.full()
+    }
+}
+
+/// Standard Update adaptor, based on `itertools::adaptors::Update`
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Debug, Clone)]
+struct UpdateSeq<I, F> {
+    base: I,
+    update_op: F,
+}
+
+impl<I, F> Iterator for UpdateSeq<I, F>
+where
+    I: Iterator,
+    F: Fn(&mut I::Item),
+{
+    type Item = I::Item;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let mut v = self.base.next()?;
+        (self.update_op)(&mut v);
+        Some(v)
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.base.size_hint()
+    }
+
+    fn fold<Acc, G>(self, init: Acc, g: G) -> Acc
+    where
+        G: FnMut(Acc, Self::Item) -> Acc,
+    {
+        self.base.map(apply(self.update_op)).fold(init, g)
+    }
+
+    // if possible, re-use inner iterator specializations in collect
+    fn collect<C>(self) -> C
+    where
+        C: ::std::iter::FromIterator<Self::Item>,
+    {
+        self.base.map(apply(self.update_op)).collect()
+    }
+}
+
+impl<I, F> ExactSizeIterator for UpdateSeq<I, F>
+where
+    I: ExactSizeIterator,
+    F: Fn(&mut I::Item),
+{
+}
+
+impl<I, F> DoubleEndedIterator for UpdateSeq<I, F>
+where
+    I: DoubleEndedIterator,
+    F: Fn(&mut I::Item),
+{
+    fn next_back(&mut self) -> Option<Self::Item> {
+        let mut v = self.base.next_back()?;
+        (self.update_op)(&mut v);
+        Some(v)
+    }
+}
diff --git a/src/iter/while_some.rs b/src/iter/while_some.rs
new file mode 100644
index 0000000..215047b
--- /dev/null
+++ b/src/iter/while_some.rs
@@ -0,0 +1,154 @@
+use super::plumbing::*;
+use super::*;
+use std::sync::atomic::{AtomicBool, Ordering};
+
+/// `WhileSome` is an iterator that yields the `Some` elements of an iterator,
+/// halting as soon as any `None` is produced.
+///
+/// This struct is created by the [`while_some()`] method on [`ParallelIterator`]
+///
+/// [`while_some()`]: trait.ParallelIterator.html#method.while_some
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Debug, Clone)]
+pub struct WhileSome<I: ParallelIterator> {
+    base: I,
+}
+
+impl<I> WhileSome<I>
+where
+    I: ParallelIterator,
+{
+    /// Creates a new `WhileSome` iterator.
+    pub(super) fn new(base: I) -> Self {
+        WhileSome { base }
+    }
+}
+
+impl<I, T> ParallelIterator for WhileSome<I>
+where
+    I: ParallelIterator<Item = Option<T>>,
+    T: Send,
+{
+    type Item = T;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let full = AtomicBool::new(false);
+        let consumer1 = WhileSomeConsumer {
+            base: consumer,
+            full: &full,
+        };
+        self.base.drive_unindexed(consumer1)
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// Consumer implementation
+
+struct WhileSomeConsumer<'f, C> {
+    base: C,
+    full: &'f AtomicBool,
+}
+
+impl<'f, T, C> Consumer<Option<T>> for WhileSomeConsumer<'f, C>
+where
+    C: Consumer<T>,
+    T: Send,
+{
+    type Folder = WhileSomeFolder<'f, C::Folder>;
+    type Reducer = C::Reducer;
+    type Result = C::Result;
+
+    fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) {
+        let (left, right, reducer) = self.base.split_at(index);
+        (
+            WhileSomeConsumer { base: left, ..self },
+            WhileSomeConsumer {
+                base: right,
+                ..self
+            },
+            reducer,
+        )
+    }
+
+    fn into_folder(self) -> Self::Folder {
+        WhileSomeFolder {
+            base: self.base.into_folder(),
+            full: self.full,
+        }
+    }
+
+    fn full(&self) -> bool {
+        self.full.load(Ordering::Relaxed) || self.base.full()
+    }
+}
+
+impl<'f, T, C> UnindexedConsumer<Option<T>> for WhileSomeConsumer<'f, C>
+where
+    C: UnindexedConsumer<T>,
+    T: Send,
+{
+    fn split_off_left(&self) -> Self {
+        WhileSomeConsumer {
+            base: self.base.split_off_left(),
+            ..*self
+        }
+    }
+
+    fn to_reducer(&self) -> Self::Reducer {
+        self.base.to_reducer()
+    }
+}
+
+struct WhileSomeFolder<'f, C> {
+    base: C,
+    full: &'f AtomicBool,
+}
+
+impl<'f, T, C> Folder<Option<T>> for WhileSomeFolder<'f, C>
+where
+    C: Folder<T>,
+{
+    type Result = C::Result;
+
+    fn consume(mut self, item: Option<T>) -> Self {
+        match item {
+            Some(item) => self.base = self.base.consume(item),
+            None => self.full.store(true, Ordering::Relaxed),
+        }
+        self
+    }
+
+    fn consume_iter<I>(mut self, iter: I) -> Self
+    where
+        I: IntoIterator<Item = Option<T>>,
+    {
+        fn some<T>(full: &AtomicBool) -> impl Fn(&Option<T>) -> bool + '_ {
+            move |x| match *x {
+                Some(_) => !full.load(Ordering::Relaxed),
+                None => {
+                    full.store(true, Ordering::Relaxed);
+                    false
+                }
+            }
+        }
+
+        self.base = self.base.consume_iter(
+            iter.into_iter()
+                .take_while(some(self.full))
+                .map(Option::unwrap),
+        );
+        self
+    }
+
+    fn complete(self) -> C::Result {
+        self.base.complete()
+    }
+
+    fn full(&self) -> bool {
+        self.full.load(Ordering::Relaxed) || self.base.full()
+    }
+}
diff --git a/src/iter/zip.rs b/src/iter/zip.rs
new file mode 100644
index 0000000..33823db
--- /dev/null
+++ b/src/iter/zip.rs
@@ -0,0 +1,159 @@
+use super::plumbing::*;
+use super::*;
+use std::cmp;
+use std::iter;
+
+/// `Zip` is an iterator that zips up `a` and `b` into a single iterator
+/// of pairs. This struct is created by the [`zip()`] method on
+/// [`IndexedParallelIterator`]
+///
+/// [`zip()`]: trait.IndexedParallelIterator.html#method.zip
+/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Debug, Clone)]
+pub struct Zip<A: IndexedParallelIterator, B: IndexedParallelIterator> {
+    a: A,
+    b: B,
+}
+
+impl<A, B> Zip<A, B>
+where
+    A: IndexedParallelIterator,
+    B: IndexedParallelIterator,
+{
+    /// Creates a new `Zip` iterator.
+    pub(super) fn new(a: A, b: B) -> Self {
+        Zip { a, b }
+    }
+}
+
+impl<A, B> ParallelIterator for Zip<A, B>
+where
+    A: IndexedParallelIterator,
+    B: IndexedParallelIterator,
+{
+    type Item = (A::Item, B::Item);
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<A, B> IndexedParallelIterator for Zip<A, B>
+where
+    A: IndexedParallelIterator,
+    B: IndexedParallelIterator,
+{
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn len(&self) -> usize {
+        cmp::min(self.a.len(), self.b.len())
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        return self.a.with_producer(CallbackA {
+            callback,
+            b: self.b,
+        });
+
+        struct CallbackA<CB, B> {
+            callback: CB,
+            b: B,
+        }
+
+        impl<CB, ITEM, B> ProducerCallback<ITEM> for CallbackA<CB, B>
+        where
+            B: IndexedParallelIterator,
+            CB: ProducerCallback<(ITEM, B::Item)>,
+        {
+            type Output = CB::Output;
+
+            fn callback<A>(self, a_producer: A) -> Self::Output
+            where
+                A: Producer<Item = ITEM>,
+            {
+                self.b.with_producer(CallbackB {
+                    a_producer,
+                    callback: self.callback,
+                })
+            }
+        }
+
+        struct CallbackB<CB, A> {
+            a_producer: A,
+            callback: CB,
+        }
+
+        impl<CB, A, ITEM> ProducerCallback<ITEM> for CallbackB<CB, A>
+        where
+            A: Producer,
+            CB: ProducerCallback<(A::Item, ITEM)>,
+        {
+            type Output = CB::Output;
+
+            fn callback<B>(self, b_producer: B) -> Self::Output
+            where
+                B: Producer<Item = ITEM>,
+            {
+                self.callback.callback(ZipProducer {
+                    a: self.a_producer,
+                    b: b_producer,
+                })
+            }
+        }
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+
+struct ZipProducer<A: Producer, B: Producer> {
+    a: A,
+    b: B,
+}
+
+impl<A: Producer, B: Producer> Producer for ZipProducer<A, B> {
+    type Item = (A::Item, B::Item);
+    type IntoIter = iter::Zip<A::IntoIter, B::IntoIter>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.a.into_iter().zip(self.b.into_iter())
+    }
+
+    fn min_len(&self) -> usize {
+        cmp::max(self.a.min_len(), self.b.min_len())
+    }
+
+    fn max_len(&self) -> usize {
+        cmp::min(self.a.max_len(), self.b.max_len())
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        let (a_left, a_right) = self.a.split_at(index);
+        let (b_left, b_right) = self.b.split_at(index);
+        (
+            ZipProducer {
+                a: a_left,
+                b: b_left,
+            },
+            ZipProducer {
+                a: a_right,
+                b: b_right,
+            },
+        )
+    }
+}
diff --git a/src/iter/zip_eq.rs b/src/iter/zip_eq.rs
new file mode 100644
index 0000000..4e64397
--- /dev/null
+++ b/src/iter/zip_eq.rs
@@ -0,0 +1,72 @@
+use super::plumbing::*;
+use super::*;
+
+/// An [`IndexedParallelIterator`] that iterates over two parallel iterators of equal
+/// length simultaneously.
+///
+/// This struct is created by the [`zip_eq`] method on [`IndexedParallelIterator`],
+/// see its documentation for more information.
+///
+/// [`zip_eq`]: trait.IndexedParallelIterator.html#method.zip_eq
+/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Debug, Clone)]
+pub struct ZipEq<A: IndexedParallelIterator, B: IndexedParallelIterator> {
+    zip: Zip<A, B>,
+}
+
+impl<A, B> ZipEq<A, B>
+where
+    A: IndexedParallelIterator,
+    B: IndexedParallelIterator,
+{
+    /// Creates a new `ZipEq` iterator.
+    pub(super) fn new(a: A, b: B) -> Self {
+        ZipEq {
+            zip: super::Zip::new(a, b),
+        }
+    }
+}
+
+impl<A, B> ParallelIterator for ZipEq<A, B>
+where
+    A: IndexedParallelIterator,
+    B: IndexedParallelIterator,
+{
+    type Item = (A::Item, B::Item);
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge(self.zip, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.zip.len())
+    }
+}
+
+impl<A, B> IndexedParallelIterator for ZipEq<A, B>
+where
+    A: IndexedParallelIterator,
+    B: IndexedParallelIterator,
+{
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        bridge(self.zip, consumer)
+    }
+
+    fn len(&self) -> usize {
+        self.zip.len()
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        self.zip.with_producer(callback)
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..d5d0314
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,120 @@
+#![doc(html_root_url = "https://docs.rs/rayon/1.5")]
+#![deny(missing_debug_implementations)]
+#![deny(missing_docs)]
+#![deny(unreachable_pub)]
+#![warn(rust_2018_idioms)]
+
+//! Data-parallelism library that makes it easy to convert sequential
+//! computations into parallel
+//!
+//! Rayon is lightweight and convenient for introducing parallelism into existing
+//! code. It guarantees data-race free executions and takes advantage of
+//! parallelism when sensible, based on work-load at runtime.
+//!
+//! # How to use Rayon
+//!
+//! There are two ways to use Rayon:
+//!
+//! - **High-level parallel constructs** are the simplest way to use Rayon and also
+//!   typically the most efficient.
+//!   - [Parallel iterators][iter module] make it easy to convert a sequential iterator to
+//!     execute in parallel.
+//!     - The [`ParallelIterator`] trait defines general methods for all parallel iterators.
+//!     - The [`IndexedParallelIterator`] trait adds methods for iterators that support random
+//!       access.
+//!   - The [`par_sort`] method sorts `&mut [T]` slices (or vectors) in parallel.
+//!   - [`par_extend`] can be used to efficiently grow collections with items produced
+//!     by a parallel iterator.
+//! - **Custom tasks** let you divide your work into parallel tasks yourself.
+//!   - [`join`] is used to subdivide a task into two pieces.
+//!   - [`scope`] creates a scope within which you can create any number of parallel tasks.
+//!   - [`ThreadPoolBuilder`] can be used to create your own thread pools or customize
+//!     the global one.
+//!
+//! [iter module]: iter/index.html
+//! [`join`]: fn.join.html
+//! [`scope`]: fn.scope.html
+//! [`par_sort`]: slice/trait.ParallelSliceMut.html#method.par_sort
+//! [`par_extend`]: iter/trait.ParallelExtend.html#tymethod.par_extend
+//! [`ThreadPoolBuilder`]: struct.ThreadPoolBuilder.html
+//!
+//! # Basic usage and the Rayon prelude
+//!
+//! First, you will need to add `rayon` to your `Cargo.toml`.
+//!
+//! Next, to use parallel iterators or the other high-level methods,
+//! you need to import several traits. Those traits are bundled into
+//! the module [`rayon::prelude`]. It is recommended that you import
+//! all of these traits at once by adding `use rayon::prelude::*` at
+//! the top of each module that uses Rayon methods.
+//!
+//! These traits give you access to the `par_iter` method which provides
+//! parallel implementations of many iterative functions such as [`map`],
+//! [`for_each`], [`filter`], [`fold`], and [more].
+//!
+//! [`rayon::prelude`]: prelude/index.html
+//! [`map`]: iter/trait.ParallelIterator.html#method.map
+//! [`for_each`]: iter/trait.ParallelIterator.html#method.for_each
+//! [`filter`]: iter/trait.ParallelIterator.html#method.filter
+//! [`fold`]: iter/trait.ParallelIterator.html#method.fold
+//! [more]: iter/trait.ParallelIterator.html#provided-methods
+//! [`ParallelIterator`]: iter/trait.ParallelIterator.html
+//! [`IndexedParallelIterator`]: iter/trait.IndexedParallelIterator.html
+//!
+//! # Crate Layout
+//!
+//! Rayon extends many of the types found in the standard library with
+//! parallel iterator implementations. The modules in the `rayon`
+//! crate mirror [`std`] itself: so, e.g., the `option` module in
+//! Rayon contains parallel iterators for the `Option` type, which is
+//! found in [the `option` module of `std`]. Similarly, the
+//! `collections` module in Rayon offers parallel iterator types for
+//! [the `collections` from `std`]. You will rarely need to access
+//! these submodules unless you need to name iterator types
+//! explicitly.
+//!
+//! [the `option` module of `std`]: https://doc.rust-lang.org/std/option/index.html
+//! [the `collections` from `std`]: https://doc.rust-lang.org/std/collections/index.html
+//! [`std`]: https://doc.rust-lang.org/std/
+//!
+//! # Other questions?
+//!
+//! See [the Rayon FAQ][faq].
+//!
+//! [faq]: https://github.com/rayon-rs/rayon/blob/master/FAQ.md
+
+#[macro_use]
+mod delegate;
+
+#[macro_use]
+mod private;
+
+mod split_producer;
+
+pub mod collections;
+pub mod iter;
+pub mod option;
+pub mod prelude;
+pub mod range;
+pub mod range_inclusive;
+pub mod result;
+pub mod slice;
+pub mod str;
+pub mod string;
+pub mod vec;
+
+mod math;
+mod par_either;
+
+mod compile_fail;
+
+pub use rayon_core::FnContext;
+pub use rayon_core::ThreadBuilder;
+pub use rayon_core::ThreadPool;
+pub use rayon_core::ThreadPoolBuildError;
+pub use rayon_core::ThreadPoolBuilder;
+pub use rayon_core::{current_num_threads, current_thread_index};
+pub use rayon_core::{join, join_context};
+pub use rayon_core::{scope, Scope};
+pub use rayon_core::{scope_fifo, ScopeFifo};
+pub use rayon_core::{spawn, spawn_fifo};
diff --git a/src/math.rs b/src/math.rs
new file mode 100644
index 0000000..9de5889
--- /dev/null
+++ b/src/math.rs
@@ -0,0 +1,54 @@
+use std::ops::{Bound, Range, RangeBounds};
+
+/// Divide `n` by `divisor`, and round up to the nearest integer
+/// if not evenly divisable.
+#[inline]
+pub(super) fn div_round_up(n: usize, divisor: usize) -> usize {
+    debug_assert!(divisor != 0, "Division by zero!");
+    if n == 0 {
+        0
+    } else {
+        (n - 1) / divisor + 1
+    }
+}
+
+/// Normalize arbitrary `RangeBounds` to a `Range`
+pub(super) fn simplify_range(range: impl RangeBounds<usize>, len: usize) -> Range<usize> {
+    let start = match range.start_bound() {
+        Bound::Unbounded => 0,
+        Bound::Included(&i) if i <= len => i,
+        Bound::Excluded(&i) if i < len => i + 1,
+        bound => panic!("range start {:?} should be <= length {}", bound, len),
+    };
+    let end = match range.end_bound() {
+        Bound::Unbounded => len,
+        Bound::Excluded(&i) if i <= len => i,
+        Bound::Included(&i) if i < len => i + 1,
+        bound => panic!("range end {:?} should be <= length {}", bound, len),
+    };
+    if start > end {
+        panic!(
+            "range start {:?} should be <= range end {:?}",
+            range.start_bound(),
+            range.end_bound()
+        );
+    }
+    start..end
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn check_div_round_up() {
+        assert_eq!(0, div_round_up(0, 5));
+        assert_eq!(1, div_round_up(5, 5));
+        assert_eq!(1, div_round_up(1, 5));
+        assert_eq!(2, div_round_up(3, 2));
+        assert_eq!(
+            usize::max_value() / 2 + 1,
+            div_round_up(usize::max_value(), 2)
+        );
+    }
+}
diff --git a/src/option.rs b/src/option.rs
new file mode 100644
index 0000000..0f56896
--- /dev/null
+++ b/src/option.rs
@@ -0,0 +1,203 @@
+//! Parallel iterator types for [options][std::option]
+//!
+//! You will rarely need to interact with this module directly unless you need
+//! to name one of the iterator types.
+//!
+//! [std::option]: https://doc.rust-lang.org/stable/std/option/
+
+use crate::iter::plumbing::*;
+use crate::iter::*;
+use std::sync::atomic::{AtomicBool, Ordering};
+
+/// A parallel iterator over the value in [`Some`] variant of an [`Option`].
+///
+/// The iterator yields one value if the [`Option`] is a [`Some`], otherwise none.
+///
+/// This `struct` is created by the [`into_par_iter`] function.
+///
+/// [`Option`]: https://doc.rust-lang.org/std/option/enum.Option.html
+/// [`Some`]: https://doc.rust-lang.org/std/option/enum.Option.html#variant.Some
+/// [`into_par_iter`]: ../iter/trait.IntoParallelIterator.html#tymethod.into_par_iter
+#[derive(Debug, Clone)]
+pub struct IntoIter<T: Send> {
+    opt: Option<T>,
+}
+
+impl<T: Send> IntoParallelIterator for Option<T> {
+    type Item = T;
+    type Iter = IntoIter<T>;
+
+    fn into_par_iter(self) -> Self::Iter {
+        IntoIter { opt: self }
+    }
+}
+
+impl<T: Send> ParallelIterator for IntoIter<T> {
+    type Item = T;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        self.drive(consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<T: Send> IndexedParallelIterator for IntoIter<T> {
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        let mut folder = consumer.into_folder();
+        if let Some(item) = self.opt {
+            folder = folder.consume(item);
+        }
+        folder.complete()
+    }
+
+    fn len(&self) -> usize {
+        match self.opt {
+            Some(_) => 1,
+            None => 0,
+        }
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        callback.callback(OptionProducer { opt: self.opt })
+    }
+}
+
+/// A parallel iterator over a reference to the [`Some`] variant of an [`Option`].
+///
+/// The iterator yields one value if the [`Option`] is a [`Some`], otherwise none.
+///
+/// This `struct` is created by the [`par_iter`] function.
+///
+/// [`Option`]: https://doc.rust-lang.org/std/option/enum.Option.html
+/// [`Some`]: https://doc.rust-lang.org/std/option/enum.Option.html#variant.Some
+/// [`par_iter`]: ../iter/trait.IntoParallelRefIterator.html#tymethod.par_iter
+#[derive(Debug)]
+pub struct Iter<'a, T: Sync> {
+    inner: IntoIter<&'a T>,
+}
+
+impl<'a, T: Sync> Clone for Iter<'a, T> {
+    fn clone(&self) -> Self {
+        Iter {
+            inner: self.inner.clone(),
+        }
+    }
+}
+
+impl<'a, T: Sync> IntoParallelIterator for &'a Option<T> {
+    type Item = &'a T;
+    type Iter = Iter<'a, T>;
+
+    fn into_par_iter(self) -> Self::Iter {
+        Iter {
+            inner: self.as_ref().into_par_iter(),
+        }
+    }
+}
+
+delegate_indexed_iterator! {
+    Iter<'a, T> => &'a T,
+    impl<'a, T: Sync + 'a>
+}
+
+/// A parallel iterator over a mutable reference to the [`Some`] variant of an [`Option`].
+///
+/// The iterator yields one value if the [`Option`] is a [`Some`], otherwise none.
+///
+/// This `struct` is created by the [`par_iter_mut`] function.
+///
+/// [`Option`]: https://doc.rust-lang.org/std/option/enum.Option.html
+/// [`Some`]: https://doc.rust-lang.org/std/option/enum.Option.html#variant.Some
+/// [`par_iter_mut`]: ../iter/trait.IntoParallelRefMutIterator.html#tymethod.par_iter_mut
+#[derive(Debug)]
+pub struct IterMut<'a, T: Send> {
+    inner: IntoIter<&'a mut T>,
+}
+
+impl<'a, T: Send> IntoParallelIterator for &'a mut Option<T> {
+    type Item = &'a mut T;
+    type Iter = IterMut<'a, T>;
+
+    fn into_par_iter(self) -> Self::Iter {
+        IterMut {
+            inner: self.as_mut().into_par_iter(),
+        }
+    }
+}
+
+delegate_indexed_iterator! {
+    IterMut<'a, T> => &'a mut T,
+    impl<'a, T: Send + 'a>
+}
+
+/// Private producer for an option
+struct OptionProducer<T: Send> {
+    opt: Option<T>,
+}
+
+impl<T: Send> Producer for OptionProducer<T> {
+    type Item = T;
+    type IntoIter = std::option::IntoIter<T>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.opt.into_iter()
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        debug_assert!(index <= 1);
+        let none = OptionProducer { opt: None };
+        if index == 0 {
+            (none, self)
+        } else {
+            (self, none)
+        }
+    }
+}
+
+/// Collect an arbitrary `Option`-wrapped collection.
+///
+/// If any item is `None`, then all previous items collected are discarded,
+/// and it returns only `None`.
+impl<C, T> FromParallelIterator<Option<T>> for Option<C>
+where
+    C: FromParallelIterator<T>,
+    T: Send,
+{
+    fn from_par_iter<I>(par_iter: I) -> Self
+    where
+        I: IntoParallelIterator<Item = Option<T>>,
+    {
+        fn check<T>(found_none: &AtomicBool) -> impl Fn(&Option<T>) + '_ {
+            move |item| {
+                if item.is_none() {
+                    found_none.store(true, Ordering::Relaxed);
+                }
+            }
+        }
+
+        let found_none = AtomicBool::new(false);
+        let collection = par_iter
+            .into_par_iter()
+            .inspect(check(&found_none))
+            .while_some()
+            .collect();
+
+        if found_none.load(Ordering::Relaxed) {
+            None
+        } else {
+            Some(collection)
+        }
+    }
+}
diff --git a/src/par_either.rs b/src/par_either.rs
new file mode 100644
index 0000000..a19ce53
--- /dev/null
+++ b/src/par_either.rs
@@ -0,0 +1,74 @@
+use crate::iter::plumbing::*;
+use crate::iter::Either::{Left, Right};
+use crate::iter::*;
+
+/// `Either<L, R>` is a parallel iterator if both `L` and `R` are parallel iterators.
+impl<L, R> ParallelIterator for Either<L, R>
+where
+    L: ParallelIterator,
+    R: ParallelIterator<Item = L::Item>,
+{
+    type Item = L::Item;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        match self {
+            Left(iter) => iter.drive_unindexed(consumer),
+            Right(iter) => iter.drive_unindexed(consumer),
+        }
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        self.as_ref().either(L::opt_len, R::opt_len)
+    }
+}
+
+impl<L, R> IndexedParallelIterator for Either<L, R>
+where
+    L: IndexedParallelIterator,
+    R: IndexedParallelIterator<Item = L::Item>,
+{
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        match self {
+            Left(iter) => iter.drive(consumer),
+            Right(iter) => iter.drive(consumer),
+        }
+    }
+
+    fn len(&self) -> usize {
+        self.as_ref().either(L::len, R::len)
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        match self {
+            Left(iter) => iter.with_producer(callback),
+            Right(iter) => iter.with_producer(callback),
+        }
+    }
+}
+
+/// `Either<L, R>` can be extended if both `L` and `R` are parallel extendable.
+impl<L, R, T> ParallelExtend<T> for Either<L, R>
+where
+    L: ParallelExtend<T>,
+    R: ParallelExtend<T>,
+    T: Send,
+{
+    fn par_extend<I>(&mut self, par_iter: I)
+    where
+        I: IntoParallelIterator<Item = T>,
+    {
+        match self.as_mut() {
+            Left(collection) => collection.par_extend(par_iter),
+            Right(collection) => collection.par_extend(par_iter),
+        }
+    }
+}
diff --git a/src/prelude.rs b/src/prelude.rs
new file mode 100644
index 0000000..6eaca06
--- /dev/null
+++ b/src/prelude.rs
@@ -0,0 +1,17 @@
+//! The rayon prelude imports the various `ParallelIterator` traits.
+//! The intention is that one can include `use rayon::prelude::*` and
+//! have easy access to the various traits and methods you will need.
+
+pub use crate::iter::FromParallelIterator;
+pub use crate::iter::IndexedParallelIterator;
+pub use crate::iter::IntoParallelIterator;
+pub use crate::iter::IntoParallelRefIterator;
+pub use crate::iter::IntoParallelRefMutIterator;
+pub use crate::iter::ParallelBridge;
+pub use crate::iter::ParallelDrainFull;
+pub use crate::iter::ParallelDrainRange;
+pub use crate::iter::ParallelExtend;
+pub use crate::iter::ParallelIterator;
+pub use crate::slice::ParallelSlice;
+pub use crate::slice::ParallelSliceMut;
+pub use crate::str::ParallelString;
diff --git a/src/private.rs b/src/private.rs
new file mode 100644
index 0000000..c85e77b
--- /dev/null
+++ b/src/private.rs
@@ -0,0 +1,26 @@
+//! The public parts of this private module are used to create traits
+//! that cannot be implemented outside of our own crate.  This way we
+//! can feel free to extend those traits without worrying about it
+//! being a breaking change for other implementations.
+
+/// If this type is pub but not publicly reachable, third parties
+/// can't name it and can't implement traits using it.
+#[allow(missing_debug_implementations)]
+pub struct PrivateMarker;
+
+macro_rules! private_decl {
+    () => {
+        /// This trait is private; this method exists to make it
+        /// impossible to implement outside the crate.
+        #[doc(hidden)]
+        fn __rayon_private__(&self) -> crate::private::PrivateMarker;
+    };
+}
+
+macro_rules! private_impl {
+    () => {
+        fn __rayon_private__(&self) -> crate::private::PrivateMarker {
+            crate::private::PrivateMarker
+        }
+    };
+}
diff --git a/src/range.rs b/src/range.rs
new file mode 100644
index 0000000..09ba25e
--- /dev/null
+++ b/src/range.rs
@@ -0,0 +1,368 @@
+//! Parallel iterator types for [ranges][std::range],
+//! the type for values created by `a..b` expressions
+//!
+//! You will rarely need to interact with this module directly unless you have
+//! need to name one of the iterator types.
+//!
+//! ```
+//! use rayon::prelude::*;
+//!
+//! let r = (0..100u64).into_par_iter()
+//!                    .sum();
+//!
+//! // compare result with sequential calculation
+//! assert_eq!((0..100).sum::<u64>(), r);
+//! ```
+//!
+//! [std::range]: https://doc.rust-lang.org/core/ops/struct.Range.html
+
+use crate::iter::plumbing::*;
+use crate::iter::*;
+use std::char;
+use std::ops::Range;
+use std::usize;
+
+/// Parallel iterator over a range, implemented for all integer types.
+///
+/// **Note:** The `zip` operation requires `IndexedParallelIterator`
+/// which is not implemented for `u64`, `i64`, `u128`, or `i128`.
+///
+/// ```
+/// use rayon::prelude::*;
+///
+/// let p = (0..25usize).into_par_iter()
+///                   .zip(0..25usize)
+///                   .filter(|&(x, y)| x % 5 == 0 || y % 5 == 0)
+///                   .map(|(x, y)| x * y)
+///                   .sum::<usize>();
+///
+/// let s = (0..25usize).zip(0..25)
+///                   .filter(|&(x, y)| x % 5 == 0 || y % 5 == 0)
+///                   .map(|(x, y)| x * y)
+///                   .sum();
+///
+/// assert_eq!(p, s);
+/// ```
+#[derive(Debug, Clone)]
+pub struct Iter<T> {
+    range: Range<T>,
+}
+
+impl<T> IntoParallelIterator for Range<T>
+where
+    Iter<T>: ParallelIterator,
+{
+    type Item = <Iter<T> as ParallelIterator>::Item;
+    type Iter = Iter<T>;
+
+    fn into_par_iter(self) -> Self::Iter {
+        Iter { range: self }
+    }
+}
+
+struct IterProducer<T> {
+    range: Range<T>,
+}
+
+impl<T> IntoIterator for IterProducer<T>
+where
+    Range<T>: Iterator,
+{
+    type Item = <Range<T> as Iterator>::Item;
+    type IntoIter = Range<T>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.range
+    }
+}
+
+macro_rules! indexed_range_impl {
+    ( $t:ty ) => {
+        impl ParallelIterator for Iter<$t> {
+            type Item = $t;
+
+            fn drive_unindexed<C>(self, consumer: C) -> C::Result
+            where
+                C: UnindexedConsumer<Self::Item>,
+            {
+                bridge(self, consumer)
+            }
+
+            fn opt_len(&self) -> Option<usize> {
+                Some(self.len())
+            }
+        }
+
+        impl IndexedParallelIterator for Iter<$t> {
+            fn drive<C>(self, consumer: C) -> C::Result
+            where
+                C: Consumer<Self::Item>,
+            {
+                bridge(self, consumer)
+            }
+
+            fn len(&self) -> usize {
+                self.range.len()
+            }
+
+            fn with_producer<CB>(self, callback: CB) -> CB::Output
+            where
+                CB: ProducerCallback<Self::Item>,
+            {
+                callback.callback(IterProducer { range: self.range })
+            }
+        }
+
+        impl Producer for IterProducer<$t> {
+            type Item = <Range<$t> as Iterator>::Item;
+            type IntoIter = Range<$t>;
+            fn into_iter(self) -> Self::IntoIter {
+                self.range
+            }
+
+            fn split_at(self, index: usize) -> (Self, Self) {
+                assert!(index <= self.range.len());
+                // For signed $t, the length and requested index could be greater than $t::MAX, and
+                // then `index as $t` could wrap to negative, so wrapping_add is necessary.
+                let mid = self.range.start.wrapping_add(index as $t);
+                let left = self.range.start..mid;
+                let right = mid..self.range.end;
+                (IterProducer { range: left }, IterProducer { range: right })
+            }
+        }
+    };
+}
+
+trait UnindexedRangeLen<L> {
+    fn len(&self) -> L;
+}
+
+macro_rules! unindexed_range_impl {
+    ( $t:ty, $len_t:ty ) => {
+        impl UnindexedRangeLen<$len_t> for Range<$t> {
+            fn len(&self) -> $len_t {
+                let &Range { start, end } = self;
+                if end > start {
+                    end.wrapping_sub(start) as $len_t
+                } else {
+                    0
+                }
+            }
+        }
+
+        impl ParallelIterator for Iter<$t> {
+            type Item = $t;
+
+            fn drive_unindexed<C>(self, consumer: C) -> C::Result
+            where
+                C: UnindexedConsumer<Self::Item>,
+            {
+                #[inline]
+                fn offset(start: $t) -> impl Fn(usize) -> $t {
+                    move |i| start.wrapping_add(i as $t)
+                }
+
+                if let Some(len) = self.opt_len() {
+                    // Drive this in indexed mode for better `collect`.
+                    (0..len)
+                        .into_par_iter()
+                        .map(offset(self.range.start))
+                        .drive(consumer)
+                } else {
+                    bridge_unindexed(IterProducer { range: self.range }, consumer)
+                }
+            }
+
+            fn opt_len(&self) -> Option<usize> {
+                let len = self.range.len();
+                if len <= usize::MAX as $len_t {
+                    Some(len as usize)
+                } else {
+                    None
+                }
+            }
+        }
+
+        impl UnindexedProducer for IterProducer<$t> {
+            type Item = $t;
+
+            fn split(mut self) -> (Self, Option<Self>) {
+                let index = self.range.len() / 2;
+                if index > 0 {
+                    let mid = self.range.start.wrapping_add(index as $t);
+                    let right = mid..self.range.end;
+                    self.range.end = mid;
+                    (self, Some(IterProducer { range: right }))
+                } else {
+                    (self, None)
+                }
+            }
+
+            fn fold_with<F>(self, folder: F) -> F
+            where
+                F: Folder<Self::Item>,
+            {
+                folder.consume_iter(self)
+            }
+        }
+    };
+}
+
+// all Range<T> with ExactSizeIterator
+indexed_range_impl! {u8}
+indexed_range_impl! {u16}
+indexed_range_impl! {u32}
+indexed_range_impl! {usize}
+indexed_range_impl! {i8}
+indexed_range_impl! {i16}
+indexed_range_impl! {i32}
+indexed_range_impl! {isize}
+
+// other Range<T> with just Iterator
+unindexed_range_impl! {u64, u64}
+unindexed_range_impl! {i64, u64}
+unindexed_range_impl! {u128, u128}
+unindexed_range_impl! {i128, u128}
+
+// char is special because of the surrogate range hole
+macro_rules! convert_char {
+    ( $self:ident . $method:ident ( $( $arg:expr ),* ) ) => {{
+        let start = $self.range.start as u32;
+        let end = $self.range.end as u32;
+        if start < 0xD800 && 0xE000 < end {
+            // chain the before and after surrogate range fragments
+            (start..0xD800)
+                .into_par_iter()
+                .chain(0xE000..end)
+                .map(|codepoint| unsafe { char::from_u32_unchecked(codepoint) })
+                .$method($( $arg ),*)
+        } else {
+            // no surrogate range to worry about
+            (start..end)
+                .into_par_iter()
+                .map(|codepoint| unsafe { char::from_u32_unchecked(codepoint) })
+                .$method($( $arg ),*)
+        }
+    }};
+}
+
+impl ParallelIterator for Iter<char> {
+    type Item = char;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        convert_char!(self.drive(consumer))
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl IndexedParallelIterator for Iter<char> {
+    // Split at the surrogate range first if we're allowed to
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        convert_char!(self.drive(consumer))
+    }
+
+    fn len(&self) -> usize {
+        // Taken from <char as Step>::steps_between
+        let start = self.range.start as u32;
+        let end = self.range.end as u32;
+        if start < end {
+            let mut count = end - start;
+            if start < 0xD800 && 0xE000 <= end {
+                count -= 0x800
+            }
+            count as usize
+        } else {
+            0
+        }
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        convert_char!(self.with_producer(callback))
+    }
+}
+
+#[test]
+fn check_range_split_at_overflow() {
+    // Note, this split index overflows i8!
+    let producer = IterProducer { range: -100i8..100 };
+    let (left, right) = producer.split_at(150);
+    let r1: i32 = left.range.map(i32::from).sum();
+    let r2: i32 = right.range.map(i32::from).sum();
+    assert_eq!(r1 + r2, -100);
+}
+
+#[test]
+fn test_i128_len_doesnt_overflow() {
+    use std::{i128, u128};
+
+    // Using parse because some versions of rust don't allow long literals
+    let octillion: i128 = "1000000000000000000000000000".parse().unwrap();
+    let producer = IterProducer {
+        range: 0..octillion,
+    };
+
+    assert_eq!(octillion as u128, producer.range.len());
+    assert_eq!(octillion as u128, (0..octillion).len());
+    assert_eq!(2 * octillion as u128, (-octillion..octillion).len());
+
+    assert_eq!(u128::MAX, (i128::MIN..i128::MAX).len());
+}
+
+#[test]
+fn test_u64_opt_len() {
+    use std::{u64, usize};
+    assert_eq!(Some(100), (0..100u64).into_par_iter().opt_len());
+    assert_eq!(
+        Some(usize::MAX),
+        (0..usize::MAX as u64).into_par_iter().opt_len()
+    );
+    if (usize::MAX as u64) < u64::MAX {
+        assert_eq!(
+            None,
+            (0..(usize::MAX as u64).wrapping_add(1))
+                .into_par_iter()
+                .opt_len()
+        );
+        assert_eq!(None, (0..u64::MAX).into_par_iter().opt_len());
+    }
+}
+
+#[test]
+fn test_u128_opt_len() {
+    use std::{u128, usize};
+    assert_eq!(Some(100), (0..100u128).into_par_iter().opt_len());
+    assert_eq!(
+        Some(usize::MAX),
+        (0..usize::MAX as u128).into_par_iter().opt_len()
+    );
+    assert_eq!(None, (0..1 + usize::MAX as u128).into_par_iter().opt_len());
+    assert_eq!(None, (0..u128::MAX).into_par_iter().opt_len());
+}
+
+// `usize as i64` can overflow, so make sure to wrap it appropriately
+// when using the `opt_len` "indexed" mode.
+#[test]
+#[cfg(target_pointer_width = "64")]
+fn test_usize_i64_overflow() {
+    use crate::ThreadPoolBuilder;
+    use std::i64;
+
+    let iter = (-2..i64::MAX).into_par_iter();
+    assert_eq!(iter.opt_len(), Some(i64::MAX as usize + 2));
+
+    // always run with multiple threads to split into, or this will take forever...
+    let pool = ThreadPoolBuilder::new().num_threads(8).build().unwrap();
+    pool.install(|| assert_eq!(iter.find_last(|_| true), Some(i64::MAX - 1)));
+}
diff --git a/src/range_inclusive.rs b/src/range_inclusive.rs
new file mode 100644
index 0000000..c802b6c
--- /dev/null
+++ b/src/range_inclusive.rs
@@ -0,0 +1,288 @@
+//! Parallel iterator types for [inclusive ranges][std::range],
+//! the type for values created by `a..=b` expressions
+//!
+//! You will rarely need to interact with this module directly unless you have
+//! need to name one of the iterator types.
+//!
+//! ```
+//! use rayon::prelude::*;
+//!
+//! let r = (0..=100u64).into_par_iter()
+//!                     .sum();
+//!
+//! // compare result with sequential calculation
+//! assert_eq!((0..=100).sum::<u64>(), r);
+//! ```
+//!
+//! [std::range]: https://doc.rust-lang.org/core/ops/struct.RangeInclusive.html
+
+use crate::iter::plumbing::*;
+use crate::iter::*;
+use std::char;
+use std::ops::RangeInclusive;
+
+/// Parallel iterator over an inclusive range, implemented for all integer types.
+///
+/// **Note:** The `zip` operation requires `IndexedParallelIterator`
+/// which is only implemented for `u8`, `i8`, `u16`, and `i16`.
+///
+/// ```
+/// use rayon::prelude::*;
+///
+/// let p = (0..=25u16).into_par_iter()
+///                   .zip(0..=25u16)
+///                   .filter(|&(x, y)| x % 5 == 0 || y % 5 == 0)
+///                   .map(|(x, y)| x * y)
+///                   .sum::<u16>();
+///
+/// let s = (0..=25u16).zip(0..=25u16)
+///                   .filter(|&(x, y)| x % 5 == 0 || y % 5 == 0)
+///                   .map(|(x, y)| x * y)
+///                   .sum();
+///
+/// assert_eq!(p, s);
+/// ```
+#[derive(Debug, Clone)]
+pub struct Iter<T> {
+    range: RangeInclusive<T>,
+}
+
+impl<T> Iter<T>
+where
+    RangeInclusive<T>: Eq,
+    T: Ord + Copy,
+{
+    /// Returns `Some((start, end))` for `start..=end`, or `None` if it is exhausted.
+    ///
+    /// Note that `RangeInclusive` does not specify the bounds of an exhausted iterator,
+    /// so this is a way for us to figure out what we've got.  Thankfully, all of the
+    /// integer types we care about can be trivially cloned.
+    fn bounds(&self) -> Option<(T, T)> {
+        let start = *self.range.start();
+        let end = *self.range.end();
+        if start <= end && self.range == (start..=end) {
+            // If the range is still nonempty, this is obviously true
+            // If the range is exhausted, either start > end or
+            // the range does not equal start..=end.
+            Some((start, end))
+        } else {
+            None
+        }
+    }
+}
+
+impl<T> IntoParallelIterator for RangeInclusive<T>
+where
+    Iter<T>: ParallelIterator,
+{
+    type Item = <Iter<T> as ParallelIterator>::Item;
+    type Iter = Iter<T>;
+
+    fn into_par_iter(self) -> Self::Iter {
+        Iter { range: self }
+    }
+}
+
+macro_rules! convert {
+    ( $self:ident . $method:ident ( $( $arg:expr ),* ) ) => {
+        if let Some((start, end)) = $self.bounds() {
+            if let Some(end) = end.checked_add(1) {
+                (start..end).into_par_iter().$method($( $arg ),*)
+            } else {
+                (start..end).into_par_iter().chain(once(end)).$method($( $arg ),*)
+            }
+        } else {
+            empty::<Self::Item>().$method($( $arg ),*)
+        }
+    };
+}
+
+macro_rules! parallel_range_impl {
+    ( $t:ty ) => {
+        impl ParallelIterator for Iter<$t> {
+            type Item = $t;
+
+            fn drive_unindexed<C>(self, consumer: C) -> C::Result
+            where
+                C: UnindexedConsumer<Self::Item>,
+            {
+                convert!(self.drive_unindexed(consumer))
+            }
+
+            fn opt_len(&self) -> Option<usize> {
+                convert!(self.opt_len())
+            }
+        }
+    };
+}
+
+macro_rules! indexed_range_impl {
+    ( $t:ty ) => {
+        parallel_range_impl! { $t }
+
+        impl IndexedParallelIterator for Iter<$t> {
+            fn drive<C>(self, consumer: C) -> C::Result
+            where
+                C: Consumer<Self::Item>,
+            {
+                convert!(self.drive(consumer))
+            }
+
+            fn len(&self) -> usize {
+                self.range.len()
+            }
+
+            fn with_producer<CB>(self, callback: CB) -> CB::Output
+            where
+                CB: ProducerCallback<Self::Item>,
+            {
+                convert!(self.with_producer(callback))
+            }
+        }
+    };
+}
+
+// all RangeInclusive<T> with ExactSizeIterator
+indexed_range_impl! {u8}
+indexed_range_impl! {u16}
+indexed_range_impl! {i8}
+indexed_range_impl! {i16}
+
+// other RangeInclusive<T> with just Iterator
+parallel_range_impl! {usize}
+parallel_range_impl! {isize}
+parallel_range_impl! {u32}
+parallel_range_impl! {i32}
+parallel_range_impl! {u64}
+parallel_range_impl! {i64}
+parallel_range_impl! {u128}
+parallel_range_impl! {i128}
+
+// char is special
+macro_rules! convert_char {
+    ( $self:ident . $method:ident ( $( $arg:expr ),* ) ) => {
+        if let Some((start, end)) = $self.bounds() {
+            let start = start as u32;
+            let end = end as u32;
+            if start < 0xD800 && 0xE000 <= end {
+                // chain the before and after surrogate range fragments
+                (start..0xD800)
+                    .into_par_iter()
+                    .chain(0xE000..end + 1) // cannot use RangeInclusive, so add one to end
+                    .map(|codepoint| unsafe { char::from_u32_unchecked(codepoint) })
+                    .$method($( $arg ),*)
+            } else {
+                // no surrogate range to worry about
+                (start..end + 1) // cannot use RangeInclusive, so add one to end
+                    .into_par_iter()
+                    .map(|codepoint| unsafe { char::from_u32_unchecked(codepoint) })
+                    .$method($( $arg ),*)
+            }
+        } else {
+            empty().into_par_iter().$method($( $arg ),*)
+        }
+    };
+}
+
+impl ParallelIterator for Iter<char> {
+    type Item = char;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        convert_char!(self.drive(consumer))
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+// Range<u32> is broken on 16 bit platforms, may as well benefit from it
+impl IndexedParallelIterator for Iter<char> {
+    // Split at the surrogate range first if we're allowed to
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        convert_char!(self.drive(consumer))
+    }
+
+    fn len(&self) -> usize {
+        if let Some((start, end)) = self.bounds() {
+            // Taken from <char as Step>::steps_between
+            let start = start as u32;
+            let end = end as u32;
+            let mut count = end - start;
+            if start < 0xD800 && 0xE000 <= end {
+                count -= 0x800
+            }
+            (count + 1) as usize // add one for inclusive
+        } else {
+            0
+        }
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        convert_char!(self.with_producer(callback))
+    }
+}
+
+#[test]
+#[cfg(target_pointer_width = "64")]
+fn test_u32_opt_len() {
+    use std::u32;
+    assert_eq!(Some(101), (0..=100u32).into_par_iter().opt_len());
+    assert_eq!(
+        Some(u32::MAX as usize),
+        (0..=u32::MAX - 1).into_par_iter().opt_len()
+    );
+    assert_eq!(
+        Some(u32::MAX as usize + 1),
+        (0..=u32::MAX).into_par_iter().opt_len()
+    );
+}
+
+#[test]
+fn test_u64_opt_len() {
+    use std::{u64, usize};
+    assert_eq!(Some(101), (0..=100u64).into_par_iter().opt_len());
+    assert_eq!(
+        Some(usize::MAX),
+        (0..=usize::MAX as u64 - 1).into_par_iter().opt_len()
+    );
+    assert_eq!(None, (0..=usize::MAX as u64).into_par_iter().opt_len());
+    assert_eq!(None, (0..=u64::MAX).into_par_iter().opt_len());
+}
+
+#[test]
+fn test_u128_opt_len() {
+    use std::{u128, usize};
+    assert_eq!(Some(101), (0..=100u128).into_par_iter().opt_len());
+    assert_eq!(
+        Some(usize::MAX),
+        (0..=usize::MAX as u128 - 1).into_par_iter().opt_len()
+    );
+    assert_eq!(None, (0..=usize::MAX as u128).into_par_iter().opt_len());
+    assert_eq!(None, (0..=u128::MAX).into_par_iter().opt_len());
+}
+
+// `usize as i64` can overflow, so make sure to wrap it appropriately
+// when using the `opt_len` "indexed" mode.
+#[test]
+#[cfg(target_pointer_width = "64")]
+fn test_usize_i64_overflow() {
+    use crate::ThreadPoolBuilder;
+    use std::i64;
+
+    let iter = (-2..=i64::MAX).into_par_iter();
+    assert_eq!(iter.opt_len(), Some(i64::MAX as usize + 3));
+
+    // always run with multiple threads to split into, or this will take forever...
+    let pool = ThreadPoolBuilder::new().num_threads(8).build().unwrap();
+    pool.install(|| assert_eq!(iter.find_last(|_| true), Some(i64::MAX)));
+}
diff --git a/src/result.rs b/src/result.rs
new file mode 100644
index 0000000..43685ca
--- /dev/null
+++ b/src/result.rs
@@ -0,0 +1,132 @@
+//! Parallel iterator types for [results][std::result]
+//!
+//! You will rarely need to interact with this module directly unless you need
+//! to name one of the iterator types.
+//!
+//! [std::result]: https://doc.rust-lang.org/stable/std/result/
+
+use crate::iter::plumbing::*;
+use crate::iter::*;
+use std::sync::Mutex;
+
+use crate::option;
+
+/// Parallel iterator over a result
+#[derive(Debug, Clone)]
+pub struct IntoIter<T: Send> {
+    inner: option::IntoIter<T>,
+}
+
+impl<T: Send, E> IntoParallelIterator for Result<T, E> {
+    type Item = T;
+    type Iter = IntoIter<T>;
+
+    fn into_par_iter(self) -> Self::Iter {
+        IntoIter {
+            inner: self.ok().into_par_iter(),
+        }
+    }
+}
+
+delegate_indexed_iterator! {
+    IntoIter<T> => T,
+    impl<T: Send>
+}
+
+/// Parallel iterator over an immutable reference to a result
+#[derive(Debug)]
+pub struct Iter<'a, T: Sync> {
+    inner: option::IntoIter<&'a T>,
+}
+
+impl<'a, T: Sync> Clone for Iter<'a, T> {
+    fn clone(&self) -> Self {
+        Iter {
+            inner: self.inner.clone(),
+        }
+    }
+}
+
+impl<'a, T: Sync, E> IntoParallelIterator for &'a Result<T, E> {
+    type Item = &'a T;
+    type Iter = Iter<'a, T>;
+
+    fn into_par_iter(self) -> Self::Iter {
+        Iter {
+            inner: self.as_ref().ok().into_par_iter(),
+        }
+    }
+}
+
+delegate_indexed_iterator! {
+    Iter<'a, T> => &'a T,
+    impl<'a, T: Sync + 'a>
+}
+
+/// Parallel iterator over a mutable reference to a result
+#[derive(Debug)]
+pub struct IterMut<'a, T: Send> {
+    inner: option::IntoIter<&'a mut T>,
+}
+
+impl<'a, T: Send, E> IntoParallelIterator for &'a mut Result<T, E> {
+    type Item = &'a mut T;
+    type Iter = IterMut<'a, T>;
+
+    fn into_par_iter(self) -> Self::Iter {
+        IterMut {
+            inner: self.as_mut().ok().into_par_iter(),
+        }
+    }
+}
+
+delegate_indexed_iterator! {
+    IterMut<'a, T> => &'a mut T,
+    impl<'a, T: Send + 'a>
+}
+
+/// Collect an arbitrary `Result`-wrapped collection.
+///
+/// If any item is `Err`, then all previous `Ok` items collected are
+/// discarded, and it returns that error.  If there are multiple errors, the
+/// one returned is not deterministic.
+impl<C, T, E> FromParallelIterator<Result<T, E>> for Result<C, E>
+where
+    C: FromParallelIterator<T>,
+    T: Send,
+    E: Send,
+{
+    fn from_par_iter<I>(par_iter: I) -> Self
+    where
+        I: IntoParallelIterator<Item = Result<T, E>>,
+    {
+        fn ok<T, E>(saved: &Mutex<Option<E>>) -> impl Fn(Result<T, E>) -> Option<T> + '_ {
+            move |item| match item {
+                Ok(item) => Some(item),
+                Err(error) => {
+                    // We don't need a blocking `lock()`, as anybody
+                    // else holding the lock will also be writing
+                    // `Some(error)`, and then ours is irrelevant.
+                    if let Ok(mut guard) = saved.try_lock() {
+                        if guard.is_none() {
+                            *guard = Some(error);
+                        }
+                    }
+                    None
+                }
+            }
+        }
+
+        let saved_error = Mutex::new(None);
+        let collection = par_iter
+            .into_par_iter()
+            .map(ok(&saved_error))
+            .while_some()
+            .collect();
+
+        match saved_error.into_inner().unwrap() {
+            Some(error) => Err(error),
+            None => Ok(collection),
+        }
+    }
+}
diff --git a/src/slice/mergesort.rs b/src/slice/mergesort.rs
new file mode 100644
index 0000000..a007cae
--- /dev/null
+++ b/src/slice/mergesort.rs
@@ -0,0 +1,763 @@
+//! Parallel merge sort.
+//!
+//! This implementation is copied verbatim from `std::slice::sort` and then parallelized.
+//! The only difference from the original is that the sequential `mergesort` returns
+//! `MergesortResult` and leaves descending arrays intact.
+
+use crate::iter::*;
+use crate::slice::ParallelSliceMut;
+use std::mem;
+use std::mem::size_of;
+use std::ptr;
+use std::slice;
+
+unsafe fn get_and_increment<T>(ptr: &mut *mut T) -> *mut T {
+    let old = *ptr;
+    *ptr = ptr.offset(1);
+    old
+}
+
+unsafe fn decrement_and_get<T>(ptr: &mut *mut T) -> *mut T {
+    *ptr = ptr.offset(-1);
+    *ptr
+}
+
+/// When dropped, copies from `src` into `dest` a sequence of length `len`.
+struct CopyOnDrop<T> {
+    src: *mut T,
+    dest: *mut T,
+    len: usize,
+}
+
+impl<T> Drop for CopyOnDrop<T> {
+    fn drop(&mut self) {
+        unsafe {
+            ptr::copy_nonoverlapping(self.src, self.dest, self.len);
+        }
+    }
+}
+
+/// Inserts `v[0]` into pre-sorted sequence `v[1..]` so that whole `v[..]` becomes sorted.
+///
+/// This is the integral subroutine of insertion sort.
+fn insert_head<T, F>(v: &mut [T], is_less: &F)
+where
+    F: Fn(&T, &T) -> bool,
+{
+    if v.len() >= 2 && is_less(&v[1], &v[0]) {
+        unsafe {
+            // There are three ways to implement insertion here:
+            //
+            // 1. Swap adjacent elements until the first one gets to its final destination.
+            //    However, this way we copy data around more than is necessary. If elements are big
+            //    structures (costly to copy), this method will be slow.
+            //
+            // 2. Iterate until the right place for the first element is found. Then shift the
+            //    elements succeeding it to make room for it and finally place it into the
+            //    remaining hole. This is a good method.
+            //
+            // 3. Copy the first element into a temporary variable. Iterate until the right place
+            //    for it is found. As we go along, copy every traversed element into the slot
+            //    preceding it. Finally, copy data from the temporary variable into the remaining
+            //    hole. This method is very good. Benchmarks demonstrated slightly better
+            //    performance than with the 2nd method.
+            //
+            // All methods were benchmarked, and the 3rd showed best results. So we chose that one.
+            let mut tmp = NoDrop {
+                value: Some(ptr::read(&v[0])),
+            };
+
+            // Intermediate state of the insertion process is always tracked by `hole`, which
+            // serves two purposes:
+            // 1. Protects integrity of `v` from panics in `is_less`.
+            // 2. Fills the remaining hole in `v` in the end.
+            //
+            // Panic safety:
+            //
+            // If `is_less` panics at any point during the process, `hole` will get dropped and
+            // fill the hole in `v` with `tmp`, thus ensuring that `v` still holds every object it
+            // initially held exactly once.
+            let mut hole = InsertionHole {
+                src: tmp.value.as_mut().unwrap(),
+                dest: &mut v[1],
+            };
+            ptr::copy_nonoverlapping(&v[1], &mut v[0], 1);
+
+            for i in 2..v.len() {
+                if !is_less(&v[i], tmp.value.as_ref().unwrap()) {
+                    break;
+                }
+                ptr::copy_nonoverlapping(&v[i], &mut v[i - 1], 1);
+                hole.dest = &mut v[i];
+            }
+            // `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`.
+        }
+    }
+
+    // Holds a value, but never drops it.
+    struct NoDrop<T> {
+        value: Option<T>,
+    }
+
+    impl<T> Drop for NoDrop<T> {
+        fn drop(&mut self) {
+            mem::forget(self.value.take());
+        }
+    }
+
+    // When dropped, copies from `src` into `dest`.
+    struct InsertionHole<T> {
+        src: *mut T,
+        dest: *mut T,
+    }
+
+    impl<T> Drop for InsertionHole<T> {
+        fn drop(&mut self) {
+            unsafe {
+                ptr::copy_nonoverlapping(self.src, self.dest, 1);
+            }
+        }
+    }
+}
+
+/// Merges non-decreasing runs `v[..mid]` and `v[mid..]` using `buf` as temporary storage, and
+/// stores the result into `v[..]`.
+///
+/// # Safety
+///
+/// The two slices must be non-empty and `mid` must be in bounds. Buffer `buf` must be long enough
+/// to hold a copy of the shorter slice. Also, `T` must not be a zero-sized type.
+unsafe fn merge<T, F>(v: &mut [T], mid: usize, buf: *mut T, is_less: &F)
+where
+    F: Fn(&T, &T) -> bool,
+{
+    let len = v.len();
+    let v = v.as_mut_ptr();
+    let v_mid = v.add(mid);
+    let v_end = v.add(len);
+
+    // The merge process first copies the shorter run into `buf`. Then it traces the newly copied
+    // run and the longer run forwards (or backwards), comparing their next unconsumed elements and
+    // copying the lesser (or greater) one into `v`.
+    //
+    // As soon as the shorter run is fully consumed, the process is done. If the longer run gets
+    // consumed first, then we must copy whatever is left of the shorter run into the remaining
+    // hole in `v`.
+    //
+    // Intermediate state of the process is always tracked by `hole`, which serves two purposes:
+    // 1. Protects integrity of `v` from panics in `is_less`.
+    // 2. Fills the remaining hole in `v` if the longer run gets consumed first.
+    //
+    // Panic safety:
+    //
+    // If `is_less` panics at any point during the process, `hole` will get dropped and fill the
+    // hole in `v` with the unconsumed range in `buf`, thus ensuring that `v` still holds every
+    // object it initially held exactly once.
+    let mut hole;
+
+    if mid <= len - mid {
+        // The left run is shorter.
+        ptr::copy_nonoverlapping(v, buf, mid);
+        hole = MergeHole {
+            start: buf,
+            end: buf.add(mid),
+            dest: v,
+        };
+
+        // Initially, these pointers point to the beginnings of their arrays.
+        let left = &mut hole.start;
+        let mut right = v_mid;
+        let out = &mut hole.dest;
+
+        while *left < hole.end && right < v_end {
+            // Consume the lesser side.
+            // If equal, prefer the left run to maintain stability.
+            let to_copy = if is_less(&*right, &**left) {
+                get_and_increment(&mut right)
+            } else {
+                get_and_increment(left)
+            };
+            ptr::copy_nonoverlapping(to_copy, get_and_increment(out), 1);
+        }
+    } else {
+        // The right run is shorter.
+        ptr::copy_nonoverlapping(v_mid, buf, len - mid);
+        hole = MergeHole {
+            start: buf,
+            end: buf.add(len - mid),
+            dest: v_mid,
+        };
+
+        // Initially, these pointers point past the ends of their arrays.
+        let left = &mut hole.dest;
+        let right = &mut hole.end;
+        let mut out = v_end;
+
+        while v < *left && buf < *right {
+            // Consume the greater side.
+            // If equal, prefer the right run to maintain stability.
+            let to_copy = if is_less(&*right.offset(-1), &*left.offset(-1)) {
+                decrement_and_get(left)
+            } else {
+                decrement_and_get(right)
+            };
+            ptr::copy_nonoverlapping(to_copy, decrement_and_get(&mut out), 1);
+        }
+    }
+    // Finally, `hole` gets dropped. If the shorter run was not fully consumed, whatever remains of
+    // it will now be copied into the hole in `v`.
+
+    // When dropped, copies the range `start..end` into `dest..`.
+    struct MergeHole<T> {
+        start: *mut T,
+        end: *mut T,
+        dest: *mut T,
+    }
+
+    impl<T> Drop for MergeHole<T> {
+        fn drop(&mut self) {
+            // `T` is not a zero-sized type, so it's okay to divide by its size.
+            let len = (self.end as usize - self.start as usize) / size_of::<T>();
+            unsafe {
+                ptr::copy_nonoverlapping(self.start, self.dest, len);
+            }
+        }
+    }
+}
+
+/// The result of merge sort.
+#[must_use]
+#[derive(Clone, Copy, PartialEq, Eq)]
+enum MergesortResult {
+    /// The slice has already been sorted.
+    NonDescending,
+    /// The slice has been descending and therefore it was left intact.
+    Descending,
+    /// The slice was sorted.
+    Sorted,
+}
+
+/// A sorted run that starts at index `start` and is of length `len`.
+#[derive(Clone, Copy)]
+struct Run {
+    start: usize,
+    len: usize,
+}
+
+/// Examines the stack of runs and identifies the next pair of runs to merge. More specifically,
+/// if `Some(r)` is returned, that means `runs[r]` and `runs[r + 1]` must be merged next. If the
+/// algorithm should continue building a new run instead, `None` is returned.
+///
+/// TimSort is infamous for its buggy implementations, as described here:
+/// http://envisage-project.eu/timsort-specification-and-verification/
+///
+/// The gist of the story is: we must enforce the invariants on the top four runs on the stack.
+/// Enforcing them on just top three is not sufficient to ensure that the invariants will still
+/// hold for *all* runs in the stack.
+///
+/// This function correctly checks invariants for the top four runs. Additionally, if the top
+/// run starts at index 0, it will always demand a merge operation until the stack is fully
+/// collapsed, in order to complete the sort.
+#[inline]
+fn collapse(runs: &[Run]) -> Option<usize> {
+    let n = runs.len();
+
+    if n >= 2
+        && (runs[n - 1].start == 0
+            || runs[n - 2].len <= runs[n - 1].len
+            || (n >= 3 && runs[n - 3].len <= runs[n - 2].len + runs[n - 1].len)
+            || (n >= 4 && runs[n - 4].len <= runs[n - 3].len + runs[n - 2].len))
+    {
+        if n >= 3 && runs[n - 3].len < runs[n - 1].len {
+            Some(n - 3)
+        } else {
+            Some(n - 2)
+        }
+    } else {
+        None
+    }
+}
+
+/// Sorts a slice using merge sort, unless it is already in descending order.
+///
+/// This function doesn't modify the slice if it is already non-descending or descending.
+/// Otherwise, it sorts the slice into non-descending order.
+///
+/// This merge sort borrows some (but not all) ideas from TimSort, which is described in detail
+/// [here](http://svn.python.org/projects/python/trunk/Objects/listsort.txt).
+///
+/// The algorithm identifies strictly descending and non-descending subsequences, which are called
+/// natural runs. There is a stack of pending runs yet to be merged. Each newly found run is pushed
+/// onto the stack, and then some pairs of adjacent runs are merged until these two invariants are
+/// satisfied:
+///
+/// 1. for every `i` in `1..runs.len()`: `runs[i - 1].len > runs[i].len`
+/// 2. for every `i` in `2..runs.len()`: `runs[i - 2].len > runs[i - 1].len + runs[i].len`
+///
+/// The invariants ensure that the total running time is `O(n log n)` worst-case.
+///
+/// # Safety
+///
+/// The argument `buf` is used as a temporary buffer and must be at least as long as `v`.
+unsafe fn mergesort<T, F>(v: &mut [T], buf: *mut T, is_less: &F) -> MergesortResult
+where
+    T: Send,
+    F: Fn(&T, &T) -> bool + Sync,
+{
+    // Very short runs are extended using insertion sort to span at least this many elements.
+    const MIN_RUN: usize = 10;
+
+    let len = v.len();
+
+    // In order to identify natural runs in `v`, we traverse it backwards. That might seem like a
+    // strange decision, but consider the fact that merges more often go in the opposite direction
+    // (forwards). According to benchmarks, merging forwards is slightly faster than merging
+    // backwards. To conclude, identifying runs by traversing backwards improves performance.
+    let mut runs = vec![];
+    let mut end = len;
+    while end > 0 {
+        // Find the next natural run, and reverse it if it's strictly descending.
+        let mut start = end - 1;
+
+        if start > 0 {
+            start -= 1;
+
+            if is_less(v.get_unchecked(start + 1), v.get_unchecked(start)) {
+                while start > 0 && is_less(v.get_unchecked(start), v.get_unchecked(start - 1)) {
+                    start -= 1;
+                }
+
+                // If this descending run covers the whole slice, return immediately.
+                if start == 0 && end == len {
+                    return MergesortResult::Descending;
+                } else {
+                    v[start..end].reverse();
+                }
+            } else {
+                while start > 0 && !is_less(v.get_unchecked(start), v.get_unchecked(start - 1)) {
+                    start -= 1;
+                }
+
+                // If this non-descending run covers the whole slice, return immediately.
+                if end - start == len {
+                    return MergesortResult::NonDescending;
+                }
+            }
+        }
+
+        // Insert some more elements into the run if it's too short. Insertion sort is faster than
+        // merge sort on short sequences, so this significantly improves performance.
+        while start > 0 && end - start < MIN_RUN {
+            start -= 1;
+            insert_head(&mut v[start..end], &is_less);
+        }
+
+        // Push this run onto the stack.
+        runs.push(Run {
+            start,
+            len: end - start,
+        });
+        end = start;
+
+        // Merge some pairs of adjacent runs to satisfy the invariants.
+        while let Some(r) = collapse(&runs) {
+            let left = runs[r + 1];
+            let right = runs[r];
+            merge(
+                &mut v[left.start..right.start + right.len],
+                left.len,
+                buf,
+                &is_less,
+            );
+
+            runs[r] = Run {
+                start: left.start,
+                len: left.len + right.len,
+            };
+            runs.remove(r + 1);
+        }
+    }
+
+    // Finally, exactly one run must remain in the stack.
+    debug_assert!(runs.len() == 1 && runs[0].start == 0 && runs[0].len == len);
+
+    // The original order of the slice was neither non-descending nor descending.
+    MergesortResult::Sorted
+}
+
+////////////////////////////////////////////////////////////////////////////
+// Everything above this line is copied from `std::slice::sort` (with very minor tweaks).
+// Everything below this line is parallelization.
+////////////////////////////////////////////////////////////////////////////
+
+/// Splits two sorted slices so that they can be merged in parallel.
+///
+/// Returns two indices `(a, b)` so that slices `left[..a]` and `right[..b]` come before
+/// `left[a..]` and `right[b..]`.
+fn split_for_merge<T, F>(left: &[T], right: &[T], is_less: &F) -> (usize, usize)
+where
+    F: Fn(&T, &T) -> bool,
+{
+    let left_len = left.len();
+    let right_len = right.len();
+
+    if left_len >= right_len {
+        let left_mid = left_len / 2;
+
+        // Find the first element in `right` that is greater than or equal to `left[left_mid]`.
+        let mut a = 0;
+        let mut b = right_len;
+        while a < b {
+            let m = a + (b - a) / 2;
+            if is_less(&right[m], &left[left_mid]) {
+                a = m + 1;
+            } else {
+                b = m;
+            }
+        }
+
+        (left_mid, a)
+    } else {
+        let right_mid = right_len / 2;
+
+        // Find the first element in `left` that is greater than `right[right_mid]`.
+        let mut a = 0;
+        let mut b = left_len;
+        while a < b {
+            let m = a + (b - a) / 2;
+            if is_less(&right[right_mid], &left[m]) {
+                b = m;
+            } else {
+                a = m + 1;
+            }
+        }
+
+        (a, right_mid)
+    }
+}
+
+/// Merges slices `left` and `right` in parallel and stores the result into `dest`.
+///
+/// # Safety
+///
+/// The `dest` pointer must have enough space to store the result.
+///
+/// Even if `is_less` panics at any point during the merge process, this function will fully copy
+/// all elements from `left` and `right` into `dest` (not necessarily in sorted order).
+unsafe fn par_merge<T, F>(left: &mut [T], right: &mut [T], dest: *mut T, is_less: &F)
+where
+    T: Send,
+    F: Fn(&T, &T) -> bool + Sync,
+{
+    // Slices whose lengths sum up to this value are merged sequentially. This number is slightly
+    // larger than `CHUNK_LENGTH`, and the reason is that merging is faster than merge sorting, so
+    // merging needs a bit coarser granularity in order to hide the overhead of Rayon's task
+    // scheduling.
+    const MAX_SEQUENTIAL: usize = 5000;
+
+    let left_len = left.len();
+    let right_len = right.len();
+
+    // Intermediate state of the merge process, which serves two purposes:
+    // 1. Protects integrity of `dest` from panics in `is_less`.
+    // 2. Copies the remaining elements as soon as one of the two sides is exhausted.
+    //
+    // Panic safety:
+    //
+    // If `is_less` panics at any point during the merge process, `s` will get dropped and copy the
+    // remaining parts of `left` and `right` into `dest`.
+    let mut s = State {
+        left_start: left.as_mut_ptr(),
+        left_end: left.as_mut_ptr().add(left_len),
+        right_start: right.as_mut_ptr(),
+        right_end: right.as_mut_ptr().add(right_len),
+        dest,
+    };
+
+    if left_len == 0 || right_len == 0 || left_len + right_len < MAX_SEQUENTIAL {
+        while s.left_start < s.left_end && s.right_start < s.right_end {
+            // Consume the lesser side.
+            // If equal, prefer the left run to maintain stability.
+            let to_copy = if is_less(&*s.right_start, &*s.left_start) {
+                get_and_increment(&mut s.right_start)
+            } else {
+                get_and_increment(&mut s.left_start)
+            };
+            ptr::copy_nonoverlapping(to_copy, get_and_increment(&mut s.dest), 1);
+        }
+    } else {
+        // Function `split_for_merge` might panic. If that happens, `s` will get destructed and copy
+        // the whole `left` and `right` into `dest`.
+        let (left_mid, right_mid) = split_for_merge(left, right, is_less);
+        let (left_l, left_r) = left.split_at_mut(left_mid);
+        let (right_l, right_r) = right.split_at_mut(right_mid);
+
+        // Prevent the destructor of `s` from running. Rayon will ensure that both calls to
+        // `par_merge` happen. If one of the two calls panics, they will ensure that elements still
+        // get copied into `dest_left` and `dest_right``.
+        mem::forget(s);
+
+        // Convert the pointers to `usize` because `*mut T` is not `Send`.
+        let dest_l = dest as usize;
+        let dest_r = dest.add(left_l.len() + right_l.len()) as usize;
+        rayon_core::join(
+            || par_merge(left_l, right_l, dest_l as *mut T, is_less),
+            || par_merge(left_r, right_r, dest_r as *mut T, is_less),
+        );
+    }
+    // Finally, `s` gets dropped if we used sequential merge, thus copying the remaining elements
+    // all at once.
+
+    // When dropped, copies arrays `left_start..left_end` and `right_start..right_end` into `dest`,
+    // in that order.
+    struct State<T> {
+        left_start: *mut T,
+        left_end: *mut T,
+        right_start: *mut T,
+        right_end: *mut T,
+        dest: *mut T,
+    }
+
+    impl<T> Drop for State<T> {
+        fn drop(&mut self) {
+            let size = size_of::<T>();
+            let left_len = (self.left_end as usize - self.left_start as usize) / size;
+            let right_len = (self.right_end as usize - self.right_start as usize) / size;
+
+            // Copy array `left`, followed by `right`.
+            unsafe {
+                ptr::copy_nonoverlapping(self.left_start, self.dest, left_len);
+                self.dest = self.dest.add(left_len);
+                ptr::copy_nonoverlapping(self.right_start, self.dest, right_len);
+            }
+        }
+    }
+}
+
+/// Recursively merges pre-sorted chunks inside `v`.
+///
+/// Chunks of `v` are stored in `chunks` as intervals (inclusive left and exclusive right bound).
+/// Argument `buf` is an auxiliary buffer that will be used during the procedure.
+/// If `into_buf` is true, the result will be stored into `buf`, otherwise it will be in `v`.
+///
+/// # Safety
+///
+/// The number of chunks must be positive and they must be adjacent: the right bound of each chunk
+/// must equal the left bound of the following chunk.
+///
+/// The buffer must be at least as long as `v`.
+unsafe fn recurse<T, F>(
+    v: *mut T,
+    buf: *mut T,
+    chunks: &[(usize, usize)],
+    into_buf: bool,
+    is_less: &F,
+) where
+    T: Send,
+    F: Fn(&T, &T) -> bool + Sync,
+{
+    let len = chunks.len();
+    debug_assert!(len > 0);
+
+    // Base case of the algorithm.
+    // If only one chunk is remaining, there's no more work to split and merge.
+    if len == 1 {
+        if into_buf {
+            // Copy the chunk from `v` into `buf`.
+            let (start, end) = chunks[0];
+            let src = v.add(start);
+            let dest = buf.add(start);
+            ptr::copy_nonoverlapping(src, dest, end - start);
+        }
+        return;
+    }
+
+    // Split the chunks into two halves.
+    let (start, _) = chunks[0];
+    let (mid, _) = chunks[len / 2];
+    let (_, end) = chunks[len - 1];
+    let (left, right) = chunks.split_at(len / 2);
+
+    // After recursive calls finish we'll have to merge chunks `(start, mid)` and `(mid, end)` from
+    // `src` into `dest`. If the current invocation has to store the result into `buf`, we'll
+    // merge chunks from `v` into `buf`, and viceversa.
+    //
+    // Recursive calls flip `into_buf` at each level of recursion. More concretely, `par_merge`
+    // merges chunks from `buf` into `v` at the first level, from `v` into `buf` at the second
+    // level etc.
+    let (src, dest) = if into_buf { (v, buf) } else { (buf, v) };
+
+    // Panic safety:
+    //
+    // If `is_less` panics at any point during the recursive calls, the destructor of `guard` will
+    // be executed, thus copying everything from `src` into `dest`. This way we ensure that all
+    // chunks are in fact copied into `dest`, even if the merge process doesn't finish.
+    let guard = CopyOnDrop {
+        src: src.add(start),
+        dest: dest.add(start),
+        len: end - start,
+    };
+
+    // Convert the pointers to `usize` because `*mut T` is not `Send`.
+    let v = v as usize;
+    let buf = buf as usize;
+    rayon_core::join(
+        || recurse(v as *mut T, buf as *mut T, left, !into_buf, is_less),
+        || recurse(v as *mut T, buf as *mut T, right, !into_buf, is_less),
+    );
+
+    // Everything went all right - recursive calls didn't panic.
+    // Forget the guard in order to prevent its destructor from running.
+    mem::forget(guard);
+
+    // Merge chunks `(start, mid)` and `(mid, end)` from `src` into `dest`.
+    let src_left = slice::from_raw_parts_mut(src.add(start), mid - start);
+    let src_right = slice::from_raw_parts_mut(src.add(mid), end - mid);
+    par_merge(src_left, src_right, dest.add(start), is_less);
+}
+
+/// Sorts `v` using merge sort in parallel.
+///
+/// The algorithm is stable, allocates memory, and `O(n log n)` worst-case.
+/// The allocated temporary buffer is of the same length as is `v`.
+pub(super) fn par_mergesort<T, F>(v: &mut [T], is_less: F)
+where
+    T: Send,
+    F: Fn(&T, &T) -> bool + Sync,
+{
+    // Slices of up to this length get sorted using insertion sort in order to avoid the cost of
+    // buffer allocation.
+    const MAX_INSERTION: usize = 20;
+    // The length of initial chunks. This number is as small as possible but so that the overhead
+    // of Rayon's task scheduling is still negligible.
+    const CHUNK_LENGTH: usize = 2000;
+
+    // Sorting has no meaningful behavior on zero-sized types.
+    if size_of::<T>() == 0 {
+        return;
+    }
+
+    let len = v.len();
+
+    // Short slices get sorted in-place via insertion sort to avoid allocations.
+    if len <= MAX_INSERTION {
+        if len >= 2 {
+            for i in (0..len - 1).rev() {
+                insert_head(&mut v[i..], &is_less);
+            }
+        }
+        return;
+    }
+
+    // Allocate a buffer to use as scratch memory. We keep the length 0 so we can keep in it
+    // shallow copies of the contents of `v` without risking the dtors running on copies if
+    // `is_less` panics.
+    let mut buf = Vec::<T>::with_capacity(len);
+    let buf = buf.as_mut_ptr();
+
+    // If the slice is not longer than one chunk would be, do sequential merge sort and return.
+    if len <= CHUNK_LENGTH {
+        let res = unsafe { mergesort(v, buf, &is_less) };
+        if res == MergesortResult::Descending {
+            v.reverse();
+        }
+        return;
+    }
+
+    // Split the slice into chunks and merge sort them in parallel.
+    // However, descending chunks will not be sorted - they will be simply left intact.
+    let mut iter = {
+        // Convert the pointer to `usize` because `*mut T` is not `Send`.
+        let buf = buf as usize;
+
+        v.par_chunks_mut(CHUNK_LENGTH)
+            .with_max_len(1)
+            .enumerate()
+            .map(|(i, chunk)| {
+                let l = CHUNK_LENGTH * i;
+                let r = l + chunk.len();
+                unsafe {
+                    let buf = (buf as *mut T).add(l);
+                    (l, r, mergesort(chunk, buf, &is_less))
+                }
+            })
+            .collect::<Vec<_>>()
+            .into_iter()
+            .peekable()
+    };
+
+    // Now attempt to concatenate adjacent chunks that were left intact.
+    let mut chunks = Vec::with_capacity(iter.len());
+
+    while let Some((a, mut b, res)) = iter.next() {
+        // If this chunk was not modified by the sort procedure...
+        if res != MergesortResult::Sorted {
+            while let Some(&(x, y, r)) = iter.peek() {
+                // If the following chunk is of the same type and can be concatenated...
+                if r == res && (r == MergesortResult::Descending) == is_less(&v[x], &v[x - 1]) {
+                    // Concatenate them.
+                    b = y;
+                    iter.next();
+                } else {
+                    break;
+                }
+            }
+        }
+
+        // Descending chunks must be reversed.
+        if res == MergesortResult::Descending {
+            v[a..b].reverse();
+        }
+
+        chunks.push((a, b));
+    }
+
+    // All chunks are properly sorted.
+    // Now we just have to merge them together.
+    unsafe {
+        recurse(v.as_mut_ptr(), buf, &chunks, false, &is_less);
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::split_for_merge;
+    use rand::distributions::Uniform;
+    use rand::{thread_rng, Rng};
+
+    #[test]
+    fn test_split_for_merge() {
+        fn check(left: &[u32], right: &[u32]) {
+            let (l, r) = split_for_merge(left, right, &|&a, &b| a < b);
+            assert!(left[..l]
+                .iter()
+                .all(|&x| right[r..].iter().all(|&y| x <= y)));
+            assert!(right[..r].iter().all(|&x| left[l..].iter().all(|&y| x < y)));
+        }
+
+        check(&[1, 2, 2, 2, 2, 3], &[1, 2, 2, 2, 2, 3]);
+        check(&[1, 2, 2, 2, 2, 3], &[]);
+        check(&[], &[1, 2, 2, 2, 2, 3]);
+
+        let mut rng = thread_rng();
+
+        for _ in 0..100 {
+            let limit: u32 = rng.gen_range(1, 21);
+            let left_len: usize = rng.gen_range(0, 20);
+            let right_len: usize = rng.gen_range(0, 20);
+
+            let mut left = rng
+                .sample_iter(&Uniform::new(0, limit))
+                .take(left_len)
+                .collect::<Vec<_>>();
+            let mut right = rng
+                .sample_iter(&Uniform::new(0, limit))
+                .take(right_len)
+                .collect::<Vec<_>>();
+
+            left.sort();
+            right.sort();
+            check(&left, &right);
+        }
+    }
+}
diff --git a/src/slice/mod.rs b/src/slice/mod.rs
new file mode 100644
index 0000000..b80125f
--- /dev/null
+++ b/src/slice/mod.rs
@@ -0,0 +1,1203 @@
+//! Parallel iterator types for [slices][std::slice]
+//!
+//! You will rarely need to interact with this module directly unless you need
+//! to name one of the iterator types.
+//!
+//! [std::slice]: https://doc.rust-lang.org/stable/std/slice/
+
+mod mergesort;
+mod quicksort;
+
+mod test;
+
+use self::mergesort::par_mergesort;
+use self::quicksort::par_quicksort;
+use crate::iter::plumbing::*;
+use crate::iter::*;
+use crate::split_producer::*;
+use std::cmp;
+use std::cmp::Ordering;
+use std::fmt::{self, Debug};
+
+use super::math::div_round_up;
+
+/// Parallel extensions for slices.
+pub trait ParallelSlice<T: Sync> {
+    /// Returns a plain slice, which is used to implement the rest of the
+    /// parallel methods.
+    fn as_parallel_slice(&self) -> &[T];
+
+    /// Returns a parallel iterator over subslices separated by elements that
+    /// match the separator.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// let smallest = [1, 2, 3, 0, 2, 4, 8, 0, 3, 6, 9]
+    ///     .par_split(|i| *i == 0)
+    ///     .map(|numbers| numbers.iter().min().unwrap())
+    ///     .min();
+    /// assert_eq!(Some(&1), smallest);
+    /// ```
+    fn par_split<P>(&self, separator: P) -> Split<'_, T, P>
+    where
+        P: Fn(&T) -> bool + Sync + Send,
+    {
+        Split {
+            slice: self.as_parallel_slice(),
+            separator,
+        }
+    }
+
+    /// Returns a parallel iterator over all contiguous windows of length
+    /// `window_size`. The windows overlap.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// let windows: Vec<_> = [1, 2, 3].par_windows(2).collect();
+    /// assert_eq!(vec![[1, 2], [2, 3]], windows);
+    /// ```
+    fn par_windows(&self, window_size: usize) -> Windows<'_, T> {
+        Windows {
+            window_size,
+            slice: self.as_parallel_slice(),
+        }
+    }
+
+    /// Returns a parallel iterator over at most `chunk_size` elements of
+    /// `self` at a time. The chunks do not overlap.
+    ///
+    /// If the number of elements in the iterator is not divisible by
+    /// `chunk_size`, the last chunk may be shorter than `chunk_size`.  All
+    /// other chunks will have that exact length.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_chunks(2).collect();
+    /// assert_eq!(chunks, vec![&[1, 2][..], &[3, 4], &[5]]);
+    /// ```
+    fn par_chunks(&self, chunk_size: usize) -> Chunks<'_, T> {
+        assert!(chunk_size != 0, "chunk_size must not be zero");
+        Chunks {
+            chunk_size,
+            slice: self.as_parallel_slice(),
+        }
+    }
+
+    /// Returns a parallel iterator over `chunk_size` elements of
+    /// `self` at a time. The chunks do not overlap.
+    ///
+    /// If `chunk_size` does not divide the length of the slice, then the
+    /// last up to `chunk_size-1` elements will be omitted and can be
+    /// retrieved from the remainder function of the iterator.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_chunks_exact(2).collect();
+    /// assert_eq!(chunks, vec![&[1, 2][..], &[3, 4]]);
+    /// ```
+    fn par_chunks_exact(&self, chunk_size: usize) -> ChunksExact<'_, T> {
+        assert!(chunk_size != 0, "chunk_size must not be zero");
+        let slice = self.as_parallel_slice();
+        let rem = slice.len() % chunk_size;
+        let len = slice.len() - rem;
+        let (fst, snd) = slice.split_at(len);
+        ChunksExact {
+            chunk_size,
+            slice: fst,
+            rem: snd,
+        }
+    }
+}
+
+impl<T: Sync> ParallelSlice<T> for [T] {
+    #[inline]
+    fn as_parallel_slice(&self) -> &[T] {
+        self
+    }
+}
+
+/// Parallel extensions for mutable slices.
+pub trait ParallelSliceMut<T: Send> {
+    /// Returns a plain mutable slice, which is used to implement the rest of
+    /// the parallel methods.
+    fn as_parallel_slice_mut(&mut self) -> &mut [T];
+
+    /// Returns a parallel iterator over mutable subslices separated by
+    /// elements that match the separator.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// let mut array = [1, 2, 3, 0, 2, 4, 8, 0, 3, 6, 9];
+    /// array.par_split_mut(|i| *i == 0)
+    ///      .for_each(|slice| slice.reverse());
+    /// assert_eq!(array, [3, 2, 1, 0, 8, 4, 2, 0, 9, 6, 3]);
+    /// ```
+    fn par_split_mut<P>(&mut self, separator: P) -> SplitMut<'_, T, P>
+    where
+        P: Fn(&T) -> bool + Sync + Send,
+    {
+        SplitMut {
+            slice: self.as_parallel_slice_mut(),
+            separator,
+        }
+    }
+
+    /// Returns a parallel iterator over at most `chunk_size` elements of
+    /// `self` at a time. The chunks are mutable and do not overlap.
+    ///
+    /// If the number of elements in the iterator is not divisible by
+    /// `chunk_size`, the last chunk may be shorter than `chunk_size`.  All
+    /// other chunks will have that exact length.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// let mut array = [1, 2, 3, 4, 5];
+    /// array.par_chunks_mut(2)
+    ///      .for_each(|slice| slice.reverse());
+    /// assert_eq!(array, [2, 1, 4, 3, 5]);
+    /// ```
+    fn par_chunks_mut(&mut self, chunk_size: usize) -> ChunksMut<'_, T> {
+        assert!(chunk_size != 0, "chunk_size must not be zero");
+        ChunksMut {
+            chunk_size,
+            slice: self.as_parallel_slice_mut(),
+        }
+    }
+
+    /// Returns a parallel iterator over `chunk_size` elements of
+    /// `self` at a time. The chunks are mutable and do not overlap.
+    ///
+    /// If `chunk_size` does not divide the length of the slice, then the
+    /// last up to `chunk_size-1` elements will be omitted and can be
+    /// retrieved from the remainder function of the iterator.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// let mut array = [1, 2, 3, 4, 5];
+    /// array.par_chunks_exact_mut(3)
+    ///      .for_each(|slice| slice.reverse());
+    /// assert_eq!(array, [3, 2, 1, 4, 5]);
+    /// ```
+    fn par_chunks_exact_mut(&mut self, chunk_size: usize) -> ChunksExactMut<'_, T> {
+        assert!(chunk_size != 0, "chunk_size must not be zero");
+        let slice = self.as_parallel_slice_mut();
+        let rem = slice.len() % chunk_size;
+        let len = slice.len() - rem;
+        let (fst, snd) = slice.split_at_mut(len);
+        ChunksExactMut {
+            chunk_size,
+            slice: fst,
+            rem: snd,
+        }
+    }
+
+    /// Sorts the slice in parallel.
+    ///
+    /// This sort is stable (i.e. does not reorder equal elements) and `O(n log n)` worst-case.
+    ///
+    /// When applicable, unstable sorting is preferred because it is generally faster than stable
+    /// sorting and it doesn't allocate auxiliary memory.
+    /// See [`par_sort_unstable`](#method.par_sort_unstable).
+    ///
+    /// # Current implementation
+    ///
+    /// The current algorithm is an adaptive merge sort inspired by
+    /// [timsort](https://en.wikipedia.org/wiki/Timsort).
+    /// It is designed to be very fast in cases where the slice is nearly sorted, or consists of
+    /// two or more sorted sequences concatenated one after another.
+    ///
+    /// Also, it allocates temporary storage the same size as `self`, but for very short slices a
+    /// non-allocating insertion sort is used instead.
+    ///
+    /// In order to sort the slice in parallel, the slice is first divided into smaller chunks and
+    /// all chunks are sorted in parallel. Then, adjacent chunks that together form non-descending
+    /// or descending runs are concatenated. Finally, the remaining chunks are merged together using
+    /// parallel subdivision of chunks and parallel merge operation.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let mut v = [-5, 4, 1, -3, 2];
+    ///
+    /// v.par_sort();
+    /// assert_eq!(v, [-5, -3, 1, 2, 4]);
+    /// ```
+    fn par_sort(&mut self)
+    where
+        T: Ord,
+    {
+        par_mergesort(self.as_parallel_slice_mut(), T::lt);
+    }
+
+    /// Sorts the slice in parallel with a comparator function.
+    ///
+    /// This sort is stable (i.e. does not reorder equal elements) and `O(n log n)` worst-case.
+    ///
+    /// When applicable, unstable sorting is preferred because it is generally faster than stable
+    /// sorting and it doesn't allocate auxiliary memory.
+    /// See [`par_sort_unstable_by`](#method.par_sort_unstable_by).
+    ///
+    /// # Current implementation
+    ///
+    /// The current algorithm is an adaptive merge sort inspired by
+    /// [timsort](https://en.wikipedia.org/wiki/Timsort).
+    /// It is designed to be very fast in cases where the slice is nearly sorted, or consists of
+    /// two or more sorted sequences concatenated one after another.
+    ///
+    /// Also, it allocates temporary storage the same size as `self`, but for very short slices a
+    /// non-allocating insertion sort is used instead.
+    ///
+    /// In order to sort the slice in parallel, the slice is first divided into smaller chunks and
+    /// all chunks are sorted in parallel. Then, adjacent chunks that together form non-descending
+    /// or descending runs are concatenated. Finally, the remaining chunks are merged together using
+    /// parallel subdivision of chunks and parallel merge operation.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let mut v = [5, 4, 1, 3, 2];
+    /// v.par_sort_by(|a, b| a.cmp(b));
+    /// assert_eq!(v, [1, 2, 3, 4, 5]);
+    ///
+    /// // reverse sorting
+    /// v.par_sort_by(|a, b| b.cmp(a));
+    /// assert_eq!(v, [5, 4, 3, 2, 1]);
+    /// ```
+    fn par_sort_by<F>(&mut self, compare: F)
+    where
+        F: Fn(&T, &T) -> Ordering + Sync,
+    {
+        par_mergesort(self.as_parallel_slice_mut(), |a, b| {
+            compare(a, b) == Ordering::Less
+        });
+    }
+
+    /// Sorts the slice in parallel with a key extraction function.
+    ///
+    /// This sort is stable (i.e. does not reorder equal elements) and `O(n log n)` worst-case.
+    ///
+    /// When applicable, unstable sorting is preferred because it is generally faster than stable
+    /// sorting and it doesn't allocate auxiliary memory.
+    /// See [`par_sort_unstable_by_key`](#method.par_sort_unstable_by_key).
+    ///
+    /// # Current implementation
+    ///
+    /// The current algorithm is an adaptive merge sort inspired by
+    /// [timsort](https://en.wikipedia.org/wiki/Timsort).
+    /// It is designed to be very fast in cases where the slice is nearly sorted, or consists of
+    /// two or more sorted sequences concatenated one after another.
+    ///
+    /// Also, it allocates temporary storage the same size as `self`, but for very short slices a
+    /// non-allocating insertion sort is used instead.
+    ///
+    /// In order to sort the slice in parallel, the slice is first divided into smaller chunks and
+    /// all chunks are sorted in parallel. Then, adjacent chunks that together form non-descending
+    /// or descending runs are concatenated. Finally, the remaining chunks are merged together using
+    /// parallel subdivision of chunks and parallel merge operation.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let mut v = [-5i32, 4, 1, -3, 2];
+    ///
+    /// v.par_sort_by_key(|k| k.abs());
+    /// assert_eq!(v, [1, 2, -3, 4, -5]);
+    /// ```
+    fn par_sort_by_key<B, F>(&mut self, f: F)
+    where
+        B: Ord,
+        F: Fn(&T) -> B + Sync,
+    {
+        par_mergesort(self.as_parallel_slice_mut(), |a, b| f(a).lt(&f(b)));
+    }
+
+    /// Sorts the slice in parallel, but may not preserve the order of equal elements.
+    ///
+    /// This sort is unstable (i.e. may reorder equal elements), in-place (i.e. does not allocate),
+    /// and `O(n log n)` worst-case.
+    ///
+    /// # Current implementation
+    ///
+    /// The current algorithm is based on Orson Peters' [pattern-defeating quicksort][pdqsort],
+    /// which is a quicksort variant designed to be very fast on certain kinds of patterns,
+    /// sometimes achieving linear time. It is randomized but deterministic, and falls back to
+    /// heapsort on degenerate inputs.
+    ///
+    /// It is generally faster than stable sorting, except in a few special cases, e.g. when the
+    /// slice consists of several concatenated sorted sequences.
+    ///
+    /// All quicksorts work in two stages: partitioning into two halves followed by recursive
+    /// calls. The partitioning phase is sequential, but the two recursive calls are performed in
+    /// parallel.
+    ///
+    /// [pdqsort]: https://github.com/orlp/pdqsort
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let mut v = [-5, 4, 1, -3, 2];
+    ///
+    /// v.par_sort_unstable();
+    /// assert_eq!(v, [-5, -3, 1, 2, 4]);
+    /// ```
+    fn par_sort_unstable(&mut self)
+    where
+        T: Ord,
+    {
+        par_quicksort(self.as_parallel_slice_mut(), T::lt);
+    }
+
+    /// Sorts the slice in parallel with a comparator function, but may not preserve the order of
+    /// equal elements.
+    ///
+    /// This sort is unstable (i.e. may reorder equal elements), in-place (i.e. does not allocate),
+    /// and `O(n log n)` worst-case.
+    ///
+    /// # Current implementation
+    ///
+    /// The current algorithm is based on Orson Peters' [pattern-defeating quicksort][pdqsort],
+    /// which is a quicksort variant designed to be very fast on certain kinds of patterns,
+    /// sometimes achieving linear time. It is randomized but deterministic, and falls back to
+    /// heapsort on degenerate inputs.
+    ///
+    /// It is generally faster than stable sorting, except in a few special cases, e.g. when the
+    /// slice consists of several concatenated sorted sequences.
+    ///
+    /// All quicksorts work in two stages: partitioning into two halves followed by recursive
+    /// calls. The partitioning phase is sequential, but the two recursive calls are performed in
+    /// parallel.
+    ///
+    /// [pdqsort]: https://github.com/orlp/pdqsort
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let mut v = [5, 4, 1, 3, 2];
+    /// v.par_sort_unstable_by(|a, b| a.cmp(b));
+    /// assert_eq!(v, [1, 2, 3, 4, 5]);
+    ///
+    /// // reverse sorting
+    /// v.par_sort_unstable_by(|a, b| b.cmp(a));
+    /// assert_eq!(v, [5, 4, 3, 2, 1]);
+    /// ```
+    fn par_sort_unstable_by<F>(&mut self, compare: F)
+    where
+        F: Fn(&T, &T) -> Ordering + Sync,
+    {
+        par_quicksort(self.as_parallel_slice_mut(), |a, b| {
+            compare(a, b) == Ordering::Less
+        });
+    }
+
+    /// Sorts the slice in parallel with a key extraction function, but may not preserve the order
+    /// of equal elements.
+    ///
+    /// This sort is unstable (i.e. may reorder equal elements), in-place (i.e. does not allocate),
+    /// and `O(n log n)` worst-case.
+    ///
+    /// # Current implementation
+    ///
+    /// The current algorithm is based on Orson Peters' [pattern-defeating quicksort][pdqsort],
+    /// which is a quicksort variant designed to be very fast on certain kinds of patterns,
+    /// sometimes achieving linear time. It is randomized but deterministic, and falls back to
+    /// heapsort on degenerate inputs.
+    ///
+    /// It is generally faster than stable sorting, except in a few special cases, e.g. when the
+    /// slice consists of several concatenated sorted sequences.
+    ///
+    /// All quicksorts work in two stages: partitioning into two halves followed by recursive
+    /// calls. The partitioning phase is sequential, but the two recursive calls are performed in
+    /// parallel.
+    ///
+    /// [pdqsort]: https://github.com/orlp/pdqsort
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let mut v = [-5i32, 4, 1, -3, 2];
+    ///
+    /// v.par_sort_unstable_by_key(|k| k.abs());
+    /// assert_eq!(v, [1, 2, -3, 4, -5]);
+    /// ```
+    fn par_sort_unstable_by_key<B, F>(&mut self, f: F)
+    where
+        B: Ord,
+        F: Fn(&T) -> B + Sync,
+    {
+        par_quicksort(self.as_parallel_slice_mut(), |a, b| f(a).lt(&f(b)));
+    }
+}
+
+impl<T: Send> ParallelSliceMut<T> for [T] {
+    #[inline]
+    fn as_parallel_slice_mut(&mut self) -> &mut [T] {
+        self
+    }
+}
+
+impl<'data, T: Sync + 'data> IntoParallelIterator for &'data [T] {
+    type Item = &'data T;
+    type Iter = Iter<'data, T>;
+
+    fn into_par_iter(self) -> Self::Iter {
+        Iter { slice: self }
+    }
+}
+
+impl<'data, T: Sync + 'data> IntoParallelIterator for &'data Vec<T> {
+    type Item = &'data T;
+    type Iter = Iter<'data, T>;
+
+    fn into_par_iter(self) -> Self::Iter {
+        Iter { slice: self }
+    }
+}
+
+impl<'data, T: Send + 'data> IntoParallelIterator for &'data mut [T] {
+    type Item = &'data mut T;
+    type Iter = IterMut<'data, T>;
+
+    fn into_par_iter(self) -> Self::Iter {
+        IterMut { slice: self }
+    }
+}
+
+impl<'data, T: Send + 'data> IntoParallelIterator for &'data mut Vec<T> {
+    type Item = &'data mut T;
+    type Iter = IterMut<'data, T>;
+
+    fn into_par_iter(self) -> Self::Iter {
+        IterMut { slice: self }
+    }
+}
+
+/// Parallel iterator over immutable items in a slice
+#[derive(Debug)]
+pub struct Iter<'data, T: Sync> {
+    slice: &'data [T],
+}
+
+impl<'data, T: Sync> Clone for Iter<'data, T> {
+    fn clone(&self) -> Self {
+        Iter { ..*self }
+    }
+}
+
+impl<'data, T: Sync + 'data> ParallelIterator for Iter<'data, T> {
+    type Item = &'data T;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<'data, T: Sync + 'data> IndexedParallelIterator for Iter<'data, T> {
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn len(&self) -> usize {
+        self.slice.len()
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        callback.callback(IterProducer { slice: self.slice })
+    }
+}
+
+struct IterProducer<'data, T: Sync> {
+    slice: &'data [T],
+}
+
+impl<'data, T: 'data + Sync> Producer for IterProducer<'data, T> {
+    type Item = &'data T;
+    type IntoIter = ::std::slice::Iter<'data, T>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.slice.iter()
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        let (left, right) = self.slice.split_at(index);
+        (IterProducer { slice: left }, IterProducer { slice: right })
+    }
+}
+
+/// Parallel iterator over immutable non-overlapping chunks of a slice
+#[derive(Debug)]
+pub struct Chunks<'data, T: Sync> {
+    chunk_size: usize,
+    slice: &'data [T],
+}
+
+impl<'data, T: Sync> Clone for Chunks<'data, T> {
+    fn clone(&self) -> Self {
+        Chunks { ..*self }
+    }
+}
+
+impl<'data, T: Sync + 'data> ParallelIterator for Chunks<'data, T> {
+    type Item = &'data [T];
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<'data, T: Sync + 'data> IndexedParallelIterator for Chunks<'data, T> {
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn len(&self) -> usize {
+        div_round_up(self.slice.len(), self.chunk_size)
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        callback.callback(ChunksProducer {
+            chunk_size: self.chunk_size,
+            slice: self.slice,
+        })
+    }
+}
+
+struct ChunksProducer<'data, T: Sync> {
+    chunk_size: usize,
+    slice: &'data [T],
+}
+
+impl<'data, T: 'data + Sync> Producer for ChunksProducer<'data, T> {
+    type Item = &'data [T];
+    type IntoIter = ::std::slice::Chunks<'data, T>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.slice.chunks(self.chunk_size)
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        let elem_index = cmp::min(index * self.chunk_size, self.slice.len());
+        let (left, right) = self.slice.split_at(elem_index);
+        (
+            ChunksProducer {
+                chunk_size: self.chunk_size,
+                slice: left,
+            },
+            ChunksProducer {
+                chunk_size: self.chunk_size,
+                slice: right,
+            },
+        )
+    }
+}
+
+/// Parallel iterator over immutable non-overlapping chunks of a slice
+#[derive(Debug)]
+pub struct ChunksExact<'data, T: Sync> {
+    chunk_size: usize,
+    slice: &'data [T],
+    rem: &'data [T],
+}
+
+impl<'data, T: Sync> ChunksExact<'data, T> {
+    /// Return the remainder of the original slice that is not going to be
+    /// returned by the iterator. The returned slice has at most `chunk_size-1`
+    /// elements.
+    pub fn remainder(&self) -> &'data [T] {
+        self.rem
+    }
+}
+
+impl<'data, T: Sync> Clone for ChunksExact<'data, T> {
+    fn clone(&self) -> Self {
+        ChunksExact { ..*self }
+    }
+}
+
+impl<'data, T: Sync + 'data> ParallelIterator for ChunksExact<'data, T> {
+    type Item = &'data [T];
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<'data, T: Sync + 'data> IndexedParallelIterator for ChunksExact<'data, T> {
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn len(&self) -> usize {
+        self.slice.len() / self.chunk_size
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        callback.callback(ChunksExactProducer {
+            chunk_size: self.chunk_size,
+            slice: self.slice,
+        })
+    }
+}
+
+struct ChunksExactProducer<'data, T: Sync> {
+    chunk_size: usize,
+    slice: &'data [T],
+}
+
+impl<'data, T: 'data + Sync> Producer for ChunksExactProducer<'data, T> {
+    type Item = &'data [T];
+    type IntoIter = ::std::slice::ChunksExact<'data, T>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.slice.chunks_exact(self.chunk_size)
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        let elem_index = index * self.chunk_size;
+        let (left, right) = self.slice.split_at(elem_index);
+        (
+            ChunksExactProducer {
+                chunk_size: self.chunk_size,
+                slice: left,
+            },
+            ChunksExactProducer {
+                chunk_size: self.chunk_size,
+                slice: right,
+            },
+        )
+    }
+}
+
+/// Parallel iterator over immutable overlapping windows of a slice
+#[derive(Debug)]
+pub struct Windows<'data, T: Sync> {
+    window_size: usize,
+    slice: &'data [T],
+}
+
+impl<'data, T: Sync> Clone for Windows<'data, T> {
+    fn clone(&self) -> Self {
+        Windows { ..*self }
+    }
+}
+
+impl<'data, T: Sync + 'data> ParallelIterator for Windows<'data, T> {
+    type Item = &'data [T];
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<'data, T: Sync + 'data> IndexedParallelIterator for Windows<'data, T> {
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn len(&self) -> usize {
+        assert!(self.window_size >= 1);
+        self.slice.len().saturating_sub(self.window_size - 1)
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        callback.callback(WindowsProducer {
+            window_size: self.window_size,
+            slice: self.slice,
+        })
+    }
+}
+
+struct WindowsProducer<'data, T: Sync> {
+    window_size: usize,
+    slice: &'data [T],
+}
+
+impl<'data, T: 'data + Sync> Producer for WindowsProducer<'data, T> {
+    type Item = &'data [T];
+    type IntoIter = ::std::slice::Windows<'data, T>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.slice.windows(self.window_size)
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        let left_index = cmp::min(self.slice.len(), index + (self.window_size - 1));
+        let left = &self.slice[..left_index];
+        let right = &self.slice[index..];
+        (
+            WindowsProducer {
+                window_size: self.window_size,
+                slice: left,
+            },
+            WindowsProducer {
+                window_size: self.window_size,
+                slice: right,
+            },
+        )
+    }
+}
+
+/// Parallel iterator over mutable items in a slice
+#[derive(Debug)]
+pub struct IterMut<'data, T: Send> {
+    slice: &'data mut [T],
+}
+
+impl<'data, T: Send + 'data> ParallelIterator for IterMut<'data, T> {
+    type Item = &'data mut T;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<'data, T: Send + 'data> IndexedParallelIterator for IterMut<'data, T> {
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn len(&self) -> usize {
+        self.slice.len()
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        callback.callback(IterMutProducer { slice: self.slice })
+    }
+}
+
+struct IterMutProducer<'data, T: Send> {
+    slice: &'data mut [T],
+}
+
+impl<'data, T: 'data + Send> Producer for IterMutProducer<'data, T> {
+    type Item = &'data mut T;
+    type IntoIter = ::std::slice::IterMut<'data, T>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.slice.iter_mut()
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        let (left, right) = self.slice.split_at_mut(index);
+        (
+            IterMutProducer { slice: left },
+            IterMutProducer { slice: right },
+        )
+    }
+}
+
+/// Parallel iterator over mutable non-overlapping chunks of a slice
+#[derive(Debug)]
+pub struct ChunksMut<'data, T: Send> {
+    chunk_size: usize,
+    slice: &'data mut [T],
+}
+
+impl<'data, T: Send + 'data> ParallelIterator for ChunksMut<'data, T> {
+    type Item = &'data mut [T];
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<'data, T: Send + 'data> IndexedParallelIterator for ChunksMut<'data, T> {
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn len(&self) -> usize {
+        div_round_up(self.slice.len(), self.chunk_size)
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        callback.callback(ChunksMutProducer {
+            chunk_size: self.chunk_size,
+            slice: self.slice,
+        })
+    }
+}
+
+struct ChunksMutProducer<'data, T: Send> {
+    chunk_size: usize,
+    slice: &'data mut [T],
+}
+
+impl<'data, T: 'data + Send> Producer for ChunksMutProducer<'data, T> {
+    type Item = &'data mut [T];
+    type IntoIter = ::std::slice::ChunksMut<'data, T>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.slice.chunks_mut(self.chunk_size)
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        let elem_index = cmp::min(index * self.chunk_size, self.slice.len());
+        let (left, right) = self.slice.split_at_mut(elem_index);
+        (
+            ChunksMutProducer {
+                chunk_size: self.chunk_size,
+                slice: left,
+            },
+            ChunksMutProducer {
+                chunk_size: self.chunk_size,
+                slice: right,
+            },
+        )
+    }
+}
+
+/// Parallel iterator over mutable non-overlapping chunks of a slice
+#[derive(Debug)]
+pub struct ChunksExactMut<'data, T: Send> {
+    chunk_size: usize,
+    slice: &'data mut [T],
+    rem: &'data mut [T],
+}
+
+impl<'data, T: Send> ChunksExactMut<'data, T> {
+    /// Return the remainder of the original slice that is not going to be
+    /// returned by the iterator. The returned slice has at most `chunk_size-1`
+    /// elements.
+    ///
+    /// Note that this has to consume `self` to return the original lifetime of
+    /// the data, which prevents this from actually being used as a parallel
+    /// iterator since that also consumes. This method is provided for parity
+    /// with `std::iter::ChunksExactMut`, but consider calling `remainder()` or
+    /// `take_remainder()` as alternatives.
+    pub fn into_remainder(self) -> &'data mut [T] {
+        self.rem
+    }
+
+    /// Return the remainder of the original slice that is not going to be
+    /// returned by the iterator. The returned slice has at most `chunk_size-1`
+    /// elements.
+    ///
+    /// Consider `take_remainder()` if you need access to the data with its
+    /// original lifetime, rather than borrowing through `&mut self` here.
+    pub fn remainder(&mut self) -> &mut [T] {
+        self.rem
+    }
+
+    /// Return the remainder of the original slice that is not going to be
+    /// returned by the iterator. The returned slice has at most `chunk_size-1`
+    /// elements. Subsequent calls will return an empty slice.
+    pub fn take_remainder(&mut self) -> &'data mut [T] {
+        std::mem::replace(&mut self.rem, &mut [])
+    }
+}
+
+impl<'data, T: Send + 'data> ParallelIterator for ChunksExactMut<'data, T> {
+    type Item = &'data mut [T];
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<'data, T: Send + 'data> IndexedParallelIterator for ChunksExactMut<'data, T> {
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn len(&self) -> usize {
+        self.slice.len() / self.chunk_size
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        callback.callback(ChunksExactMutProducer {
+            chunk_size: self.chunk_size,
+            slice: self.slice,
+        })
+    }
+}
+
+struct ChunksExactMutProducer<'data, T: Send> {
+    chunk_size: usize,
+    slice: &'data mut [T],
+}
+
+impl<'data, T: 'data + Send> Producer for ChunksExactMutProducer<'data, T> {
+    type Item = &'data mut [T];
+    type IntoIter = ::std::slice::ChunksExactMut<'data, T>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.slice.chunks_exact_mut(self.chunk_size)
+    }
+
+    fn split_at(self, index: usize) -> (Self, Self) {
+        let elem_index = index * self.chunk_size;
+        let (left, right) = self.slice.split_at_mut(elem_index);
+        (
+            ChunksExactMutProducer {
+                chunk_size: self.chunk_size,
+                slice: left,
+            },
+            ChunksExactMutProducer {
+                chunk_size: self.chunk_size,
+                slice: right,
+            },
+        )
+    }
+}
+
+/// Parallel iterator over slices separated by a predicate
+pub struct Split<'data, T, P> {
+    slice: &'data [T],
+    separator: P,
+}
+
+impl<'data, T, P: Clone> Clone for Split<'data, T, P> {
+    fn clone(&self) -> Self {
+        Split {
+            separator: self.separator.clone(),
+            ..*self
+        }
+    }
+}
+
+impl<'data, T: Debug, P> Debug for Split<'data, T, P> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("Split").field("slice", &self.slice).finish()
+    }
+}
+
+impl<'data, T, P> ParallelIterator for Split<'data, T, P>
+where
+    P: Fn(&T) -> bool + Sync + Send,
+    T: Sync,
+{
+    type Item = &'data [T];
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let producer = SplitProducer::new(self.slice, &self.separator);
+        bridge_unindexed(producer, consumer)
+    }
+}
+
+/// Implement support for `SplitProducer`.
+impl<'data, T, P> Fissile<P> for &'data [T]
+where
+    P: Fn(&T) -> bool,
+{
+    fn length(&self) -> usize {
+        self.len()
+    }
+
+    fn midpoint(&self, end: usize) -> usize {
+        end / 2
+    }
+
+    fn find(&self, separator: &P, start: usize, end: usize) -> Option<usize> {
+        self[start..end].iter().position(separator)
+    }
+
+    fn rfind(&self, separator: &P, end: usize) -> Option<usize> {
+        self[..end].iter().rposition(separator)
+    }
+
+    fn split_once(self, index: usize) -> (Self, Self) {
+        let (left, right) = self.split_at(index);
+        (left, &right[1..]) // skip the separator
+    }
+
+    fn fold_splits<F>(self, separator: &P, folder: F, skip_last: bool) -> F
+    where
+        F: Folder<Self>,
+        Self: Send,
+    {
+        let mut split = self.split(separator);
+        if skip_last {
+            split.next_back();
+        }
+        folder.consume_iter(split)
+    }
+}
+
+/// Parallel iterator over mutable slices separated by a predicate
+pub struct SplitMut<'data, T, P> {
+    slice: &'data mut [T],
+    separator: P,
+}
+
+impl<'data, T: Debug, P> Debug for SplitMut<'data, T, P> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("SplitMut")
+            .field("slice", &self.slice)
+            .finish()
+    }
+}
+
+impl<'data, T, P> ParallelIterator for SplitMut<'data, T, P>
+where
+    P: Fn(&T) -> bool + Sync + Send,
+    T: Send,
+{
+    type Item = &'data mut [T];
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let producer = SplitProducer::new(self.slice, &self.separator);
+        bridge_unindexed(producer, consumer)
+    }
+}
+
+/// Implement support for `SplitProducer`.
+impl<'data, T, P> Fissile<P> for &'data mut [T]
+where
+    P: Fn(&T) -> bool,
+{
+    fn length(&self) -> usize {
+        self.len()
+    }
+
+    fn midpoint(&self, end: usize) -> usize {
+        end / 2
+    }
+
+    fn find(&self, separator: &P, start: usize, end: usize) -> Option<usize> {
+        self[start..end].iter().position(separator)
+    }
+
+    fn rfind(&self, separator: &P, end: usize) -> Option<usize> {
+        self[..end].iter().rposition(separator)
+    }
+
+    fn split_once(self, index: usize) -> (Self, Self) {
+        let (left, right) = self.split_at_mut(index);
+        (left, &mut right[1..]) // skip the separator
+    }
+
+    fn fold_splits<F>(self, separator: &P, folder: F, skip_last: bool) -> F
+    where
+        F: Folder<Self>,
+        Self: Send,
+    {
+        let mut split = self.split_mut(separator);
+        if skip_last {
+            split.next_back();
+        }
+        folder.consume_iter(split)
+    }
+}
diff --git a/src/slice/quicksort.rs b/src/slice/quicksort.rs
new file mode 100644
index 0000000..b985073
--- /dev/null
+++ b/src/slice/quicksort.rs
@@ -0,0 +1,800 @@
+//! Parallel quicksort.
+//!
+//! This implementation is copied verbatim from `std::slice::sort_unstable` and then parallelized.
+//! The only difference from the original is that calls to `recurse` are executed in parallel using
+//! `rayon_core::join`.
+
+use std::cmp;
+use std::mem;
+use std::ptr;
+
+/// When dropped, takes the value out of `Option` and writes it into `dest`.
+///
+/// This allows us to safely read the pivot into a stack-allocated variable for efficiency, and
+/// write it back into the slice after partitioning. This way we ensure that the write happens
+/// even if `is_less` panics in the meantime.
+struct WriteOnDrop<T> {
+    value: Option<T>,
+    dest: *mut T,
+}
+
+impl<T> Drop for WriteOnDrop<T> {
+    fn drop(&mut self) {
+        unsafe {
+            ptr::write(self.dest, self.value.take().unwrap());
+        }
+    }
+}
+
+/// Holds a value, but never drops it.
+struct NoDrop<T> {
+    value: Option<T>,
+}
+
+impl<T> Drop for NoDrop<T> {
+    fn drop(&mut self) {
+        mem::forget(self.value.take());
+    }
+}
+
+/// When dropped, copies from `src` into `dest`.
+struct CopyOnDrop<T> {
+    src: *mut T,
+    dest: *mut T,
+}
+
+impl<T> Drop for CopyOnDrop<T> {
+    fn drop(&mut self) {
+        unsafe {
+            ptr::copy_nonoverlapping(self.src, self.dest, 1);
+        }
+    }
+}
+
+/// Shifts the first element to the right until it encounters a greater or equal element.
+fn shift_head<T, F>(v: &mut [T], is_less: &F)
+where
+    F: Fn(&T, &T) -> bool,
+{
+    let len = v.len();
+    unsafe {
+        // If the first two elements are out-of-order...
+        if len >= 2 && is_less(v.get_unchecked(1), v.get_unchecked(0)) {
+            // Read the first element into a stack-allocated variable. If a following comparison
+            // operation panics, `hole` will get dropped and automatically write the element back
+            // into the slice.
+            let mut tmp = NoDrop {
+                value: Some(ptr::read(v.get_unchecked(0))),
+            };
+            let mut hole = CopyOnDrop {
+                src: tmp.value.as_mut().unwrap(),
+                dest: v.get_unchecked_mut(1),
+            };
+            ptr::copy_nonoverlapping(v.get_unchecked(1), v.get_unchecked_mut(0), 1);
+
+            for i in 2..len {
+                if !is_less(v.get_unchecked(i), tmp.value.as_ref().unwrap()) {
+                    break;
+                }
+
+                // Move `i`-th element one place to the left, thus shifting the hole to the right.
+                ptr::copy_nonoverlapping(v.get_unchecked(i), v.get_unchecked_mut(i - 1), 1);
+                hole.dest = v.get_unchecked_mut(i);
+            }
+            // `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`.
+        }
+    }
+}
+
+/// Shifts the last element to the left until it encounters a smaller or equal element.
+fn shift_tail<T, F>(v: &mut [T], is_less: &F)
+where
+    F: Fn(&T, &T) -> bool,
+{
+    let len = v.len();
+    unsafe {
+        // If the last two elements are out-of-order...
+        if len >= 2 && is_less(v.get_unchecked(len - 1), v.get_unchecked(len - 2)) {
+            // Read the last element into a stack-allocated variable. If a following comparison
+            // operation panics, `hole` will get dropped and automatically write the element back
+            // into the slice.
+            let mut tmp = NoDrop {
+                value: Some(ptr::read(v.get_unchecked(len - 1))),
+            };
+            let mut hole = CopyOnDrop {
+                src: tmp.value.as_mut().unwrap(),
+                dest: v.get_unchecked_mut(len - 2),
+            };
+            ptr::copy_nonoverlapping(v.get_unchecked(len - 2), v.get_unchecked_mut(len - 1), 1);
+
+            for i in (0..len - 2).rev() {
+                if !is_less(&tmp.value.as_ref().unwrap(), v.get_unchecked(i)) {
+                    break;
+                }
+
+                // Move `i`-th element one place to the right, thus shifting the hole to the left.
+                ptr::copy_nonoverlapping(v.get_unchecked(i), v.get_unchecked_mut(i + 1), 1);
+                hole.dest = v.get_unchecked_mut(i);
+            }
+            // `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`.
+        }
+    }
+}
+
+/// Partially sorts a slice by shifting several out-of-order elements around.
+///
+/// Returns `true` if the slice is sorted at the end. This function is `O(n)` worst-case.
+#[cold]
+fn partial_insertion_sort<T, F>(v: &mut [T], is_less: &F) -> bool
+where
+    F: Fn(&T, &T) -> bool,
+{
+    // Maximum number of adjacent out-of-order pairs that will get shifted.
+    const MAX_STEPS: usize = 5;
+    // If the slice is shorter than this, don't shift any elements.
+    const SHORTEST_SHIFTING: usize = 50;
+
+    let len = v.len();
+    let mut i = 1;
+
+    for _ in 0..MAX_STEPS {
+        unsafe {
+            // Find the next pair of adjacent out-of-order elements.
+            while i < len && !is_less(v.get_unchecked(i), v.get_unchecked(i - 1)) {
+                i += 1;
+            }
+        }
+
+        // Are we done?
+        if i == len {
+            return true;
+        }
+
+        // Don't shift elements on short arrays, that has a performance cost.
+        if len < SHORTEST_SHIFTING {
+            return false;
+        }
+
+        // Swap the found pair of elements. This puts them in correct order.
+        v.swap(i - 1, i);
+
+        // Shift the smaller element to the left.
+        shift_tail(&mut v[..i], is_less);
+        // Shift the greater element to the right.
+        shift_head(&mut v[i..], is_less);
+    }
+
+    // Didn't manage to sort the slice in the limited number of steps.
+    false
+}
+
+/// Sorts a slice using insertion sort, which is `O(n^2)` worst-case.
+fn insertion_sort<T, F>(v: &mut [T], is_less: &F)
+where
+    F: Fn(&T, &T) -> bool,
+{
+    for i in 1..v.len() {
+        shift_tail(&mut v[..=i], is_less);
+    }
+}
+
+/// Sorts `v` using heapsort, which guarantees `O(n log n)` worst-case.
+#[cold]
+fn heapsort<T, F>(v: &mut [T], is_less: &F)
+where
+    F: Fn(&T, &T) -> bool,
+{
+    // This binary heap respects the invariant `parent >= child`.
+    let sift_down = |v: &mut [T], mut node| {
+        loop {
+            // Children of `node`:
+            let left = 2 * node + 1;
+            let right = 2 * node + 2;
+
+            // Choose the greater child.
+            let greater = if right < v.len() && is_less(&v[left], &v[right]) {
+                right
+            } else {
+                left
+            };
+
+            // Stop if the invariant holds at `node`.
+            if greater >= v.len() || !is_less(&v[node], &v[greater]) {
+                break;
+            }
+
+            // Swap `node` with the greater child, move one step down, and continue sifting.
+            v.swap(node, greater);
+            node = greater;
+        }
+    };
+
+    // Build the heap in linear time.
+    for i in (0..v.len() / 2).rev() {
+        sift_down(v, i);
+    }
+
+    // Pop maximal elements from the heap.
+    for i in (1..v.len()).rev() {
+        v.swap(0, i);
+        sift_down(&mut v[..i], 0);
+    }
+}
+
+/// Partitions `v` into elements smaller than `pivot`, followed by elements greater than or equal
+/// to `pivot`.
+///
+/// Returns the number of elements smaller than `pivot`.
+///
+/// Partitioning is performed block-by-block in order to minimize the cost of branching operations.
+/// This idea is presented in the [BlockQuicksort][pdf] paper.
+///
+/// [pdf]: http://drops.dagstuhl.de/opus/volltexte/2016/6389/pdf/LIPIcs-ESA-2016-38.pdf
+fn partition_in_blocks<T, F>(v: &mut [T], pivot: &T, is_less: &F) -> usize
+where
+    F: Fn(&T, &T) -> bool,
+{
+    // Number of elements in a typical block.
+    const BLOCK: usize = 128;
+
+    // The partitioning algorithm repeats the following steps until completion:
+    //
+    // 1. Trace a block from the left side to identify elements greater than or equal to the pivot.
+    // 2. Trace a block from the right side to identify elements smaller than the pivot.
+    // 3. Exchange the identified elements between the left and right side.
+    //
+    // We keep the following variables for a block of elements:
+    //
+    // 1. `block` - Number of elements in the block.
+    // 2. `start` - Start pointer into the `offsets` array.
+    // 3. `end` - End pointer into the `offsets` array.
+    // 4. `offsets - Indices of out-of-order elements within the block.
+
+    // The current block on the left side (from `l` to `l.offset(block_l)`).
+    let mut l = v.as_mut_ptr();
+    let mut block_l = BLOCK;
+    let mut start_l = ptr::null_mut();
+    let mut end_l = ptr::null_mut();
+    let mut offsets_l = [0u8; BLOCK];
+
+    // The current block on the right side (from `r.offset(-block_r)` to `r`).
+    let mut r = unsafe { l.add(v.len()) };
+    let mut block_r = BLOCK;
+    let mut start_r = ptr::null_mut();
+    let mut end_r = ptr::null_mut();
+    let mut offsets_r = [0u8; BLOCK];
+
+    // Returns the number of elements between pointers `l` (inclusive) and `r` (exclusive).
+    fn width<T>(l: *mut T, r: *mut T) -> usize {
+        assert!(mem::size_of::<T>() > 0);
+        (r as usize - l as usize) / mem::size_of::<T>()
+    }
+
+    loop {
+        // We are done with partitioning block-by-block when `l` and `r` get very close. Then we do
+        // some patch-up work in order to partition the remaining elements in between.
+        let is_done = width(l, r) <= 2 * BLOCK;
+
+        if is_done {
+            // Number of remaining elements (still not compared to the pivot).
+            let mut rem = width(l, r);
+            if start_l < end_l || start_r < end_r {
+                rem -= BLOCK;
+            }
+
+            // Adjust block sizes so that the left and right block don't overlap, but get perfectly
+            // aligned to cover the whole remaining gap.
+            if start_l < end_l {
+                block_r = rem;
+            } else if start_r < end_r {
+                block_l = rem;
+            } else {
+                block_l = rem / 2;
+                block_r = rem - block_l;
+            }
+            debug_assert!(block_l <= BLOCK && block_r <= BLOCK);
+            debug_assert!(width(l, r) == block_l + block_r);
+        }
+
+        if start_l == end_l {
+            // Trace `block_l` elements from the left side.
+            start_l = offsets_l.as_mut_ptr();
+            end_l = offsets_l.as_mut_ptr();
+            let mut elem = l;
+
+            for i in 0..block_l {
+                unsafe {
+                    // Branchless comparison.
+                    *end_l = i as u8;
+                    end_l = end_l.offset(!is_less(&*elem, pivot) as isize);
+                    elem = elem.offset(1);
+                }
+            }
+        }
+
+        if start_r == end_r {
+            // Trace `block_r` elements from the right side.
+            start_r = offsets_r.as_mut_ptr();
+            end_r = offsets_r.as_mut_ptr();
+            let mut elem = r;
+
+            for i in 0..block_r {
+                unsafe {
+                    // Branchless comparison.
+                    elem = elem.offset(-1);
+                    *end_r = i as u8;
+                    end_r = end_r.offset(is_less(&*elem, pivot) as isize);
+                }
+            }
+        }
+
+        // Number of out-of-order elements to swap between the left and right side.
+        let count = cmp::min(width(start_l, end_l), width(start_r, end_r));
+
+        if count > 0 {
+            macro_rules! left {
+                () => {
+                    l.offset(*start_l as isize)
+                };
+            }
+            macro_rules! right {
+                () => {
+                    r.offset(-(*start_r as isize) - 1)
+                };
+            }
+
+            // Instead of swapping one pair at the time, it is more efficient to perform a cyclic
+            // permutation. This is not strictly equivalent to swapping, but produces a similar
+            // result using fewer memory operations.
+            unsafe {
+                let tmp = ptr::read(left!());
+                ptr::copy_nonoverlapping(right!(), left!(), 1);
+
+                for _ in 1..count {
+                    start_l = start_l.offset(1);
+                    ptr::copy_nonoverlapping(left!(), right!(), 1);
+                    start_r = start_r.offset(1);
+                    ptr::copy_nonoverlapping(right!(), left!(), 1);
+                }
+
+                ptr::copy_nonoverlapping(&tmp, right!(), 1);
+                mem::forget(tmp);
+                start_l = start_l.offset(1);
+                start_r = start_r.offset(1);
+            }
+        }
+
+        if start_l == end_l {
+            // All out-of-order elements in the left block were moved. Move to the next block.
+            l = unsafe { l.add(block_l) };
+        }
+
+        if start_r == end_r {
+            // All out-of-order elements in the right block were moved. Move to the previous block.
+            r = unsafe { r.sub(block_r) };
+        }
+
+        if is_done {
+            break;
+        }
+    }
+
+    // All that remains now is at most one block (either the left or the right) with out-of-order
+    // elements that need to be moved. Such remaining elements can be simply shifted to the end
+    // within their block.
+
+    if start_l < end_l {
+        // The left block remains.
+        // Move it's remaining out-of-order elements to the far right.
+        debug_assert_eq!(width(l, r), block_l);
+        while start_l < end_l {
+            unsafe {
+                end_l = end_l.offset(-1);
+                ptr::swap(l.offset(*end_l as isize), r.offset(-1));
+                r = r.offset(-1);
+            }
+        }
+        width(v.as_mut_ptr(), r)
+    } else if start_r < end_r {
+        // The right block remains.
+        // Move it's remaining out-of-order elements to the far left.
+        debug_assert_eq!(width(l, r), block_r);
+        while start_r < end_r {
+            unsafe {
+                end_r = end_r.offset(-1);
+                ptr::swap(l, r.offset(-(*end_r as isize) - 1));
+                l = l.offset(1);
+            }
+        }
+        width(v.as_mut_ptr(), l)
+    } else {
+        // Nothing else to do, we're done.
+        width(v.as_mut_ptr(), l)
+    }
+}
+
+/// Partitions `v` into elements smaller than `v[pivot]`, followed by elements greater than or
+/// equal to `v[pivot]`.
+///
+/// Returns a tuple of:
+///
+/// 1. Number of elements smaller than `v[pivot]`.
+/// 2. True if `v` was already partitioned.
+fn partition<T, F>(v: &mut [T], pivot: usize, is_less: &F) -> (usize, bool)
+where
+    F: Fn(&T, &T) -> bool,
+{
+    let (mid, was_partitioned) = {
+        // Place the pivot at the beginning of slice.
+        v.swap(0, pivot);
+        let (pivot, v) = v.split_at_mut(1);
+        let pivot = &mut pivot[0];
+
+        // Read the pivot into a stack-allocated variable for efficiency. If a following comparison
+        // operation panics, the pivot will be automatically written back into the slice.
+        let write_on_drop = WriteOnDrop {
+            value: unsafe { Some(ptr::read(pivot)) },
+            dest: pivot,
+        };
+        let pivot = write_on_drop.value.as_ref().unwrap();
+
+        // Find the first pair of out-of-order elements.
+        let mut l = 0;
+        let mut r = v.len();
+        unsafe {
+            // Find the first element greater then or equal to the pivot.
+            while l < r && is_less(v.get_unchecked(l), pivot) {
+                l += 1;
+            }
+
+            // Find the last element smaller that the pivot.
+            while l < r && !is_less(v.get_unchecked(r - 1), pivot) {
+                r -= 1;
+            }
+        }
+
+        (
+            l + partition_in_blocks(&mut v[l..r], pivot, is_less),
+            l >= r,
+        )
+
+        // `write_on_drop` goes out of scope and writes the pivot (which is a stack-allocated
+        // variable) back into the slice where it originally was. This step is critical in ensuring
+        // safety!
+    };
+
+    // Place the pivot between the two partitions.
+    v.swap(0, mid);
+
+    (mid, was_partitioned)
+}
+
+/// Partitions `v` into elements equal to `v[pivot]` followed by elements greater than `v[pivot]`.
+///
+/// Returns the number of elements equal to the pivot. It is assumed that `v` does not contain
+/// elements smaller than the pivot.
+fn partition_equal<T, F>(v: &mut [T], pivot: usize, is_less: &F) -> usize
+where
+    F: Fn(&T, &T) -> bool,
+{
+    // Place the pivot at the beginning of slice.
+    v.swap(0, pivot);
+    let (pivot, v) = v.split_at_mut(1);
+    let pivot = &mut pivot[0];
+
+    // Read the pivot into a stack-allocated variable for efficiency. If a following comparison
+    // operation panics, the pivot will be automatically written back into the slice.
+    let write_on_drop = WriteOnDrop {
+        value: unsafe { Some(ptr::read(pivot)) },
+        dest: pivot,
+    };
+    let pivot = write_on_drop.value.as_ref().unwrap();
+
+    // Now partition the slice.
+    let mut l = 0;
+    let mut r = v.len();
+    loop {
+        unsafe {
+            // Find the first element greater that the pivot.
+            while l < r && !is_less(pivot, v.get_unchecked(l)) {
+                l += 1;
+            }
+
+            // Find the last element equal to the pivot.
+            while l < r && is_less(pivot, v.get_unchecked(r - 1)) {
+                r -= 1;
+            }
+
+            // Are we done?
+            if l >= r {
+                break;
+            }
+
+            // Swap the found pair of out-of-order elements.
+            r -= 1;
+            ptr::swap(v.get_unchecked_mut(l), v.get_unchecked_mut(r));
+            l += 1;
+        }
+    }
+
+    // We found `l` elements equal to the pivot. Add 1 to account for the pivot itself.
+    l + 1
+
+    // `write_on_drop` goes out of scope and writes the pivot (which is a stack-allocated variable)
+    // back into the slice where it originally was. This step is critical in ensuring safety!
+}
+
+/// Scatters some elements around in an attempt to break patterns that might cause imbalanced
+/// partitions in quicksort.
+#[cold]
+fn break_patterns<T>(v: &mut [T]) {
+    let len = v.len();
+    if len >= 8 {
+        // Pseudorandom number generator from the "Xorshift RNGs" paper by George Marsaglia.
+        let mut random = len as u32;
+        let mut gen_u32 = || {
+            random ^= random << 13;
+            random ^= random >> 17;
+            random ^= random << 5;
+            random
+        };
+        let mut gen_usize = || {
+            if mem::size_of::<usize>() <= 4 {
+                gen_u32() as usize
+            } else {
+                ((u64::from(gen_u32()) << 32) | u64::from(gen_u32())) as usize
+            }
+        };
+
+        // Take random numbers modulo this number.
+        // The number fits into `usize` because `len` is not greater than `isize::MAX`.
+        let modulus = len.next_power_of_two();
+
+        // Some pivot candidates will be in the nearby of this index. Let's randomize them.
+        let pos = len / 4 * 2;
+
+        for i in 0..3 {
+            // Generate a random number modulo `len`. However, in order to avoid costly operations
+            // we first take it modulo a power of two, and then decrease by `len` until it fits
+            // into the range `[0, len - 1]`.
+            let mut other = gen_usize() & (modulus - 1);
+
+            // `other` is guaranteed to be less than `2 * len`.
+            if other >= len {
+                other -= len;
+            }
+
+            v.swap(pos - 1 + i, other);
+        }
+    }
+}
+
+/// Chooses a pivot in `v` and returns the index and `true` if the slice is likely already sorted.
+///
+/// Elements in `v` might be reordered in the process.
+fn choose_pivot<T, F>(v: &mut [T], is_less: &F) -> (usize, bool)
+where
+    F: Fn(&T, &T) -> bool,
+{
+    // Minimum length to choose the median-of-medians method.
+    // Shorter slices use the simple median-of-three method.
+    const SHORTEST_MEDIAN_OF_MEDIANS: usize = 50;
+    // Maximum number of swaps that can be performed in this function.
+    const MAX_SWAPS: usize = 4 * 3;
+
+    let len = v.len();
+
+    // Three indices near which we are going to choose a pivot.
+    let mut a = len / 4 * 1;
+    let mut b = len / 4 * 2;
+    let mut c = len / 4 * 3;
+
+    // Counts the total number of swaps we are about to perform while sorting indices.
+    let mut swaps = 0;
+
+    if len >= 8 {
+        // Swaps indices so that `v[a] <= v[b]`.
+        let mut sort2 = |a: &mut usize, b: &mut usize| unsafe {
+            if is_less(v.get_unchecked(*b), v.get_unchecked(*a)) {
+                ptr::swap(a, b);
+                swaps += 1;
+            }
+        };
+
+        // Swaps indices so that `v[a] <= v[b] <= v[c]`.
+        let mut sort3 = |a: &mut usize, b: &mut usize, c: &mut usize| {
+            sort2(a, b);
+            sort2(b, c);
+            sort2(a, b);
+        };
+
+        if len >= SHORTEST_MEDIAN_OF_MEDIANS {
+            // Finds the median of `v[a - 1], v[a], v[a + 1]` and stores the index into `a`.
+            let mut sort_adjacent = |a: &mut usize| {
+                let tmp = *a;
+                sort3(&mut (tmp - 1), a, &mut (tmp + 1));
+            };
+
+            // Find medians in the neighborhoods of `a`, `b`, and `c`.
+            sort_adjacent(&mut a);
+            sort_adjacent(&mut b);
+            sort_adjacent(&mut c);
+        }
+
+        // Find the median among `a`, `b`, and `c`.
+        sort3(&mut a, &mut b, &mut c);
+    }
+
+    if swaps < MAX_SWAPS {
+        (b, swaps == 0)
+    } else {
+        // The maximum number of swaps was performed. Chances are the slice is descending or mostly
+        // descending, so reversing will probably help sort it faster.
+        v.reverse();
+        (len - 1 - b, true)
+    }
+}
+
+/// Sorts `v` recursively.
+///
+/// If the slice had a predecessor in the original array, it is specified as `pred`.
+///
+/// `limit` is the number of allowed imbalanced partitions before switching to `heapsort`. If zero,
+/// this function will immediately switch to heapsort.
+fn recurse<'a, T, F>(mut v: &'a mut [T], is_less: &F, mut pred: Option<&'a mut T>, mut limit: usize)
+where
+    T: Send,
+    F: Fn(&T, &T) -> bool + Sync,
+{
+    // Slices of up to this length get sorted using insertion sort.
+    const MAX_INSERTION: usize = 20;
+    // If both partitions are up to this length, we continue sequentially. This number is as small
+    // as possible but so that the overhead of Rayon's task scheduling is still negligible.
+    const MAX_SEQUENTIAL: usize = 2000;
+
+    // True if the last partitioning was reasonably balanced.
+    let mut was_balanced = true;
+    // True if the last partitioning didn't shuffle elements (the slice was already partitioned).
+    let mut was_partitioned = true;
+
+    loop {
+        let len = v.len();
+
+        // Very short slices get sorted using insertion sort.
+        if len <= MAX_INSERTION {
+            insertion_sort(v, is_less);
+            return;
+        }
+
+        // If too many bad pivot choices were made, simply fall back to heapsort in order to
+        // guarantee `O(n log n)` worst-case.
+        if limit == 0 {
+            heapsort(v, is_less);
+            return;
+        }
+
+        // If the last partitioning was imbalanced, try breaking patterns in the slice by shuffling
+        // some elements around. Hopefully we'll choose a better pivot this time.
+        if !was_balanced {
+            break_patterns(v);
+            limit -= 1;
+        }
+
+        // Choose a pivot and try guessing whether the slice is already sorted.
+        let (pivot, likely_sorted) = choose_pivot(v, is_less);
+
+        // If the last partitioning was decently balanced and didn't shuffle elements, and if pivot
+        // selection predicts the slice is likely already sorted...
+        if was_balanced && was_partitioned && likely_sorted {
+            // Try identifying several out-of-order elements and shifting them to correct
+            // positions. If the slice ends up being completely sorted, we're done.
+            if partial_insertion_sort(v, is_less) {
+                return;
+            }
+        }
+
+        // If the chosen pivot is equal to the predecessor, then it's the smallest element in the
+        // slice. Partition the slice into elements equal to and elements greater than the pivot.
+        // This case is usually hit when the slice contains many duplicate elements.
+        if let Some(ref p) = pred {
+            if !is_less(p, &v[pivot]) {
+                let mid = partition_equal(v, pivot, is_less);
+
+                // Continue sorting elements greater than the pivot.
+                v = &mut { v }[mid..];
+                continue;
+            }
+        }
+
+        // Partition the slice.
+        let (mid, was_p) = partition(v, pivot, is_less);
+        was_balanced = cmp::min(mid, len - mid) >= len / 8;
+        was_partitioned = was_p;
+
+        // Split the slice into `left`, `pivot`, and `right`.
+        let (left, right) = { v }.split_at_mut(mid);
+        let (pivot, right) = right.split_at_mut(1);
+        let pivot = &mut pivot[0];
+
+        if cmp::max(left.len(), right.len()) <= MAX_SEQUENTIAL {
+            // Recurse into the shorter side only in order to minimize the total number of recursive
+            // calls and consume less stack space. Then just continue with the longer side (this is
+            // akin to tail recursion).
+            if left.len() < right.len() {
+                recurse(left, is_less, pred, limit);
+                v = right;
+                pred = Some(pivot);
+            } else {
+                recurse(right, is_less, Some(pivot), limit);
+                v = left;
+            }
+        } else {
+            // Sort the left and right half in parallel.
+            rayon_core::join(
+                || recurse(left, is_less, pred, limit),
+                || recurse(right, is_less, Some(pivot), limit),
+            );
+            break;
+        }
+    }
+}
+
+/// Sorts `v` using pattern-defeating quicksort in parallel.
+///
+/// The algorithm is unstable, in-place, and `O(n log n)` worst-case.
+pub(super) fn par_quicksort<T, F>(v: &mut [T], is_less: F)
+where
+    T: Send,
+    F: Fn(&T, &T) -> bool + Sync,
+{
+    // Sorting has no meaningful behavior on zero-sized types.
+    if mem::size_of::<T>() == 0 {
+        return;
+    }
+
+    // Limit the number of imbalanced partitions to `floor(log2(len)) + 1`.
+    let limit = mem::size_of::<usize>() * 8 - v.len().leading_zeros() as usize;
+
+    recurse(v, &is_less, None, limit);
+}
+
+#[cfg(test)]
+mod tests {
+    use super::heapsort;
+    use rand::distributions::Uniform;
+    use rand::{thread_rng, Rng};
+
+    #[test]
+    fn test_heapsort() {
+        let rng = thread_rng();
+
+        for len in (0..25).chain(500..501) {
+            for &modulus in &[5, 10, 100] {
+                let dist = Uniform::new(0, modulus);
+                for _ in 0..100 {
+                    let v: Vec<i32> = rng.sample_iter(&dist).take(len).collect();
+
+                    // Test heapsort using `<` operator.
+                    let mut tmp = v.clone();
+                    heapsort(&mut tmp, &|a, b| a < b);
+                    assert!(tmp.windows(2).all(|w| w[0] <= w[1]));
+
+                    // Test heapsort using `>` operator.
+                    let mut tmp = v.clone();
+                    heapsort(&mut tmp, &|a, b| a > b);
+                    assert!(tmp.windows(2).all(|w| w[0] >= w[1]));
+                }
+            }
+        }
+
+        // Sort using a completely random comparison function.
+        // This will reorder the elements *somehow*, but won't panic.
+        let mut v: Vec<_> = (0..100).collect();
+        heapsort(&mut v, &|_, _| thread_rng().gen());
+        heapsort(&mut v, &|a, b| a < b);
+
+        for i in 0..v.len() {
+            assert_eq!(v[i], i);
+        }
+    }
+}
diff --git a/src/slice/test.rs b/src/slice/test.rs
new file mode 100644
index 0000000..97de7d8
--- /dev/null
+++ b/src/slice/test.rs
@@ -0,0 +1,148 @@
+#![cfg(test)]
+
+use crate::prelude::*;
+use rand::distributions::Uniform;
+use rand::seq::SliceRandom;
+use rand::{thread_rng, Rng};
+use std::cmp::Ordering::{Equal, Greater, Less};
+
+macro_rules! sort {
+    ($f:ident, $name:ident) => {
+        #[test]
+        fn $name() {
+            let mut rng = thread_rng();
+
+            for len in (0..25).chain(500..501) {
+                for &modulus in &[5, 10, 100] {
+                    let dist = Uniform::new(0, modulus);
+                    for _ in 0..100 {
+                        let v: Vec<i32> = rng.sample_iter(&dist).take(len).collect();
+
+                        // Test sort using `<` operator.
+                        let mut tmp = v.clone();
+                        tmp.$f(|a, b| a.cmp(b));
+                        assert!(tmp.windows(2).all(|w| w[0] <= w[1]));
+
+                        // Test sort using `>` operator.
+                        let mut tmp = v.clone();
+                        tmp.$f(|a, b| b.cmp(a));
+                        assert!(tmp.windows(2).all(|w| w[0] >= w[1]));
+                    }
+                }
+            }
+
+            // Test sort with many duplicates.
+            for &len in &[1_000, 10_000, 100_000] {
+                for &modulus in &[5, 10, 100, 10_000] {
+                    let dist = Uniform::new(0, modulus);
+                    let mut v: Vec<i32> = rng.sample_iter(&dist).take(len).collect();
+
+                    v.$f(|a, b| a.cmp(b));
+                    assert!(v.windows(2).all(|w| w[0] <= w[1]));
+                }
+            }
+
+            // Test sort with many pre-sorted runs.
+            for &len in &[1_000, 10_000, 100_000] {
+                let len_dist = Uniform::new(0, len);
+                for &modulus in &[5, 10, 1000, 50_000] {
+                    let dist = Uniform::new(0, modulus);
+                    let mut v: Vec<i32> = rng.sample_iter(&dist).take(len).collect();
+
+                    v.sort();
+                    v.reverse();
+
+                    for _ in 0..5 {
+                        let a = rng.sample(&len_dist);
+                        let b = rng.sample(&len_dist);
+                        if a < b {
+                            v[a..b].reverse();
+                        } else {
+                            v.swap(a, b);
+                        }
+                    }
+
+                    v.$f(|a, b| a.cmp(b));
+                    assert!(v.windows(2).all(|w| w[0] <= w[1]));
+                }
+            }
+
+            // Sort using a completely random comparison function.
+            // This will reorder the elements *somehow*, but won't panic.
+            let mut v: Vec<_> = (0..100).collect();
+            v.$f(|_, _| *[Less, Equal, Greater].choose(&mut thread_rng()).unwrap());
+            v.$f(|a, b| a.cmp(b));
+            for i in 0..v.len() {
+                assert_eq!(v[i], i);
+            }
+
+            // Should not panic.
+            [0i32; 0].$f(|a, b| a.cmp(b));
+            [(); 10].$f(|a, b| a.cmp(b));
+            [(); 100].$f(|a, b| a.cmp(b));
+
+            let mut v = [0xDEAD_BEEFu64];
+            v.$f(|a, b| a.cmp(b));
+            assert!(v == [0xDEAD_BEEF]);
+        }
+    };
+}
+
+sort!(par_sort_by, test_par_sort);
+sort!(par_sort_unstable_by, test_par_sort_unstable);
+
+#[test]
+fn test_par_sort_stability() {
+    for len in (2..25).chain(500..510).chain(50_000..50_010) {
+        for _ in 0..10 {
+            let mut counts = [0; 10];
+
+            // Create a vector like [(6, 1), (5, 1), (6, 2), ...],
+            // where the first item of each tuple is random, but
+            // the second item represents which occurrence of that
+            // number this element is, i.e. the second elements
+            // will occur in sorted order.
+            let mut rng = thread_rng();
+            let mut v: Vec<_> = (0..len)
+                .map(|_| {
+                    let n: usize = rng.gen_range(0, 10);
+                    counts[n] += 1;
+                    (n, counts[n])
+                })
+                .collect();
+
+            // Only sort on the first element, so an unstable sort
+            // may mix up the counts.
+            v.par_sort_by(|&(a, _), &(b, _)| a.cmp(&b));
+
+            // This comparison includes the count (the second item
+            // of the tuple), so elements with equal first items
+            // will need to be ordered with increasing
+            // counts... i.e. exactly asserting that this sort is
+            // stable.
+            assert!(v.windows(2).all(|w| w[0] <= w[1]));
+        }
+    }
+}
+
+#[test]
+fn test_par_chunks_exact_remainder() {
+    let v: &[i32] = &[0, 1, 2, 3, 4];
+    let c = v.par_chunks_exact(2);
+    assert_eq!(c.remainder(), &[4]);
+    assert_eq!(c.len(), 2);
+}
+
+#[test]
+fn test_par_chunks_exact_mut_remainder() {
+    let v: &mut [i32] = &mut [0, 1, 2, 3, 4];
+    let mut c = v.par_chunks_exact_mut(2);
+    assert_eq!(c.remainder(), &[4]);
+    assert_eq!(c.len(), 2);
+    assert_eq!(c.into_remainder(), &[4]);
+
+    let mut c = v.par_chunks_exact_mut(2);
+    assert_eq!(c.take_remainder(), &[4]);
+    assert_eq!(c.take_remainder(), &[]);
+    assert_eq!(c.len(), 2);
+}
diff --git a/src/split_producer.rs b/src/split_producer.rs
new file mode 100644
index 0000000..568657a
--- /dev/null
+++ b/src/split_producer.rs
@@ -0,0 +1,132 @@
+//! Common splitter for strings and slices
+//!
+//! This module is private, so these items are effectively `pub(super)`
+
+use crate::iter::plumbing::{Folder, UnindexedProducer};
+
+/// Common producer for splitting on a predicate.
+pub(super) struct SplitProducer<'p, P, V> {
+    data: V,
+    separator: &'p P,
+
+    /// Marks the endpoint beyond which we've already found no separators.
+    tail: usize,
+}
+
+/// Helper trait so `&str`, `&[T]`, and `&mut [T]` can share `SplitProducer`.
+pub(super) trait Fissile<P>: Sized {
+    fn length(&self) -> usize;
+    fn midpoint(&self, end: usize) -> usize;
+    fn find(&self, separator: &P, start: usize, end: usize) -> Option<usize>;
+    fn rfind(&self, separator: &P, end: usize) -> Option<usize>;
+    fn split_once(self, index: usize) -> (Self, Self);
+    fn fold_splits<F>(self, separator: &P, folder: F, skip_last: bool) -> F
+    where
+        F: Folder<Self>,
+        Self: Send;
+}
+
+impl<'p, P, V> SplitProducer<'p, P, V>
+where
+    V: Fissile<P> + Send,
+{
+    pub(super) fn new(data: V, separator: &'p P) -> Self {
+        SplitProducer {
+            tail: data.length(),
+            data,
+            separator,
+        }
+    }
+
+    /// Common `fold_with` implementation, integrating `SplitTerminator`'s
+    /// need to sometimes skip its final empty item.
+    pub(super) fn fold_with<F>(self, folder: F, skip_last: bool) -> F
+    where
+        F: Folder<V>,
+    {
+        let SplitProducer {
+            data,
+            separator,
+            tail,
+        } = self;
+
+        if tail == data.length() {
+            // No tail section, so just let `fold_splits` handle it.
+            data.fold_splits(separator, folder, skip_last)
+        } else if let Some(index) = data.rfind(separator, tail) {
+            // We found the last separator to complete the tail, so
+            // end with that slice after `fold_splits` finds the rest.
+            let (left, right) = data.split_once(index);
+            let folder = left.fold_splits(separator, folder, false);
+            if skip_last || folder.full() {
+                folder
+            } else {
+                folder.consume(right)
+            }
+        } else {
+            // We know there are no separators at all.  Return our whole data.
+            if skip_last {
+                folder
+            } else {
+                folder.consume(data)
+            }
+        }
+    }
+}
+
+impl<'p, P, V> UnindexedProducer for SplitProducer<'p, P, V>
+where
+    V: Fissile<P> + Send,
+    P: Sync,
+{
+    type Item = V;
+
+    fn split(self) -> (Self, Option<Self>) {
+        // Look forward for the separator, and failing that look backward.
+        let mid = self.data.midpoint(self.tail);
+        let index = match self.data.find(self.separator, mid, self.tail) {
+            Some(i) => Some(mid + i),
+            None => self.data.rfind(self.separator, mid),
+        };
+
+        if let Some(index) = index {
+            let len = self.data.length();
+            let (left, right) = self.data.split_once(index);
+
+            let (left_tail, right_tail) = if index < mid {
+                // If we scanned backwards to find the separator, everything in
+                // the right side is exhausted, with no separators left to find.
+                (index, 0)
+            } else {
+                let right_index = len - right.length();
+                (mid, self.tail - right_index)
+            };
+
+            // Create the left split before the separator.
+            let left = SplitProducer {
+                data: left,
+                tail: left_tail,
+                ..self
+            };
+
+            // Create the right split following the separator.
+            let right = SplitProducer {
+                data: right,
+                tail: right_tail,
+                ..self
+            };
+
+            (left, Some(right))
+        } else {
+            // The search is exhausted, no more separators...
+            (SplitProducer { tail: 0, ..self }, None)
+        }
+    }
+
+    fn fold_with<F>(self, folder: F) -> F
+    where
+        F: Folder<Self::Item>,
+    {
+        self.fold_with(folder, false)
+    }
+}
diff --git a/src/str.rs b/src/str.rs
new file mode 100644
index 0000000..2fdaaa7
--- /dev/null
+++ b/src/str.rs
@@ -0,0 +1,874 @@
+//! Parallel iterator types for [strings][std::str]
+//!
+//! You will rarely need to interact with this module directly unless you need
+//! to name one of the iterator types.
+//!
+//! Note: [`ParallelString::par_split()`] and [`par_split_terminator()`]
+//! reference a `Pattern` trait which is not visible outside this crate.
+//! This trait is intentionally kept private, for use only by Rayon itself.
+//! It is implemented for `char` and any `F: Fn(char) -> bool + Sync + Send`.
+//!
+//! [`ParallelString::par_split()`]: trait.ParallelString.html#method.par_split
+//! [`par_split_terminator()`]: trait.ParallelString.html#method.par_split_terminator
+//!
+//! [std::str]: https://doc.rust-lang.org/stable/std/str/
+
+use crate::iter::plumbing::*;
+use crate::iter::*;
+use crate::split_producer::*;
+
+/// Test if a byte is the start of a UTF-8 character.
+/// (extracted from `str::is_char_boundary`)
+#[inline]
+fn is_char_boundary(b: u8) -> bool {
+    // This is bit magic equivalent to: b < 128 || b >= 192
+    (b as i8) >= -0x40
+}
+
+/// Find the index of a character boundary near the midpoint.
+#[inline]
+fn find_char_midpoint(chars: &str) -> usize {
+    let mid = chars.len() / 2;
+
+    // We want to split near the midpoint, but we need to find an actual
+    // character boundary.  So we look at the raw bytes, first scanning
+    // forward from the midpoint for a boundary, then trying backward.
+    let (left, right) = chars.as_bytes().split_at(mid);
+    match right.iter().cloned().position(is_char_boundary) {
+        Some(i) => mid + i,
+        None => left
+            .iter()
+            .cloned()
+            .rposition(is_char_boundary)
+            .unwrap_or(0),
+    }
+}
+
+/// Try to split a string near the midpoint.
+#[inline]
+fn split(chars: &str) -> Option<(&str, &str)> {
+    let index = find_char_midpoint(chars);
+    if index > 0 {
+        Some(chars.split_at(index))
+    } else {
+        None
+    }
+}
+
+/// Parallel extensions for strings.
+pub trait ParallelString {
+    /// Returns a plain string slice, which is used to implement the rest of
+    /// the parallel methods.
+    fn as_parallel_string(&self) -> &str;
+
+    /// Returns a parallel iterator over the characters of a string.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// let max = "hello".par_chars().max_by_key(|c| *c as i32);
+    /// assert_eq!(Some('o'), max);
+    /// ```
+    fn par_chars(&self) -> Chars<'_> {
+        Chars {
+            chars: self.as_parallel_string(),
+        }
+    }
+
+    /// Returns a parallel iterator over the characters of a string, with their positions.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// let min = "hello".par_char_indices().min_by_key(|&(_i, c)| c as i32);
+    /// assert_eq!(Some((1, 'e')), min);
+    /// ```
+    fn par_char_indices(&self) -> CharIndices<'_> {
+        CharIndices {
+            chars: self.as_parallel_string(),
+        }
+    }
+
+    /// Returns a parallel iterator over the bytes of a string.
+    ///
+    /// Note that multi-byte sequences (for code points greater than `U+007F`)
+    /// are produced as separate items, but will not be split across threads.
+    /// If you would prefer an indexed iterator without that guarantee, consider
+    /// `string.as_bytes().par_iter().cloned()` instead.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// let max = "hello".par_bytes().max();
+    /// assert_eq!(Some(b'o'), max);
+    /// ```
+    fn par_bytes(&self) -> Bytes<'_> {
+        Bytes {
+            chars: self.as_parallel_string(),
+        }
+    }
+
+    /// Returns a parallel iterator over a string encoded as UTF-16.
+    ///
+    /// Note that surrogate pairs (for code points greater than `U+FFFF`) are
+    /// produced as separate items, but will not be split across threads.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    ///
+    /// let max = "hello".par_encode_utf16().max();
+    /// assert_eq!(Some(b'o' as u16), max);
+    ///
+    /// let text = "Zażółć gęślą jaźń";
+    /// let utf8_len = text.len();
+    /// let utf16_len = text.par_encode_utf16().count();
+    /// assert!(utf16_len <= utf8_len);
+    /// ```
+    fn par_encode_utf16(&self) -> EncodeUtf16<'_> {
+        EncodeUtf16 {
+            chars: self.as_parallel_string(),
+        }
+    }
+
+    /// Returns a parallel iterator over substrings separated by a
+    /// given character or predicate, similar to `str::split`.
+    ///
+    /// Note: the `Pattern` trait is private, for use only by Rayon itself.
+    /// It is implemented for `char` and any `F: Fn(char) -> bool + Sync + Send`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// let total = "1, 2, buckle, 3, 4, door"
+    ///    .par_split(',')
+    ///    .filter_map(|s| s.trim().parse::<i32>().ok())
+    ///    .sum();
+    /// assert_eq!(10, total);
+    /// ```
+    fn par_split<P: Pattern>(&self, separator: P) -> Split<'_, P> {
+        Split::new(self.as_parallel_string(), separator)
+    }
+
+    /// Returns a parallel iterator over substrings terminated by a
+    /// given character or predicate, similar to `str::split_terminator`.
+    /// It's equivalent to `par_split`, except it doesn't produce an empty
+    /// substring after a trailing terminator.
+    ///
+    /// Note: the `Pattern` trait is private, for use only by Rayon itself.
+    /// It is implemented for `char` and any `F: Fn(char) -> bool + Sync + Send`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// let parts: Vec<_> = "((1 + 3) * 2)"
+    ///     .par_split_terminator(|c| c == '(' || c == ')')
+    ///     .collect();
+    /// assert_eq!(vec!["", "", "1 + 3", " * 2"], parts);
+    /// ```
+    fn par_split_terminator<P: Pattern>(&self, terminator: P) -> SplitTerminator<'_, P> {
+        SplitTerminator::new(self.as_parallel_string(), terminator)
+    }
+
+    /// Returns a parallel iterator over the lines of a string, ending with an
+    /// optional carriage return and with a newline (`\r\n` or just `\n`).
+    /// The final line ending is optional, and line endings are not included in
+    /// the output strings.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// let lengths: Vec<_> = "hello world\nfizbuzz"
+    ///     .par_lines()
+    ///     .map(|l| l.len())
+    ///     .collect();
+    /// assert_eq!(vec![11, 7], lengths);
+    /// ```
+    fn par_lines(&self) -> Lines<'_> {
+        Lines(self.as_parallel_string())
+    }
+
+    /// Returns a parallel iterator over the sub-slices of a string that are
+    /// separated by any amount of whitespace.
+    ///
+    /// As with `str::split_whitespace`, 'whitespace' is defined according to
+    /// the terms of the Unicode Derived Core Property `White_Space`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// let longest = "which is the longest word?"
+    ///     .par_split_whitespace()
+    ///     .max_by_key(|word| word.len());
+    /// assert_eq!(Some("longest"), longest);
+    /// ```
+    fn par_split_whitespace(&self) -> SplitWhitespace<'_> {
+        SplitWhitespace(self.as_parallel_string())
+    }
+
+    /// Returns a parallel iterator over substrings that match a
+    /// given character or predicate, similar to `str::matches`.
+    ///
+    /// Note: the `Pattern` trait is private, for use only by Rayon itself.
+    /// It is implemented for `char` and any `F: Fn(char) -> bool + Sync + Send`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// let total = "1, 2, buckle, 3, 4, door"
+    ///    .par_matches(char::is_numeric)
+    ///    .map(|s| s.parse::<i32>().expect("digit"))
+    ///    .sum();
+    /// assert_eq!(10, total);
+    /// ```
+    fn par_matches<P: Pattern>(&self, pattern: P) -> Matches<'_, P> {
+        Matches {
+            chars: self.as_parallel_string(),
+            pattern,
+        }
+    }
+
+    /// Returns a parallel iterator over substrings that match a given character
+    /// or predicate, with their positions, similar to `str::match_indices`.
+    ///
+    /// Note: the `Pattern` trait is private, for use only by Rayon itself.
+    /// It is implemented for `char` and any `F: Fn(char) -> bool + Sync + Send`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rayon::prelude::*;
+    /// let digits: Vec<_> = "1, 2, buckle, 3, 4, door"
+    ///    .par_match_indices(char::is_numeric)
+    ///    .collect();
+    /// assert_eq!(digits, vec![(0, "1"), (3, "2"), (14, "3"), (17, "4")]);
+    /// ```
+    fn par_match_indices<P: Pattern>(&self, pattern: P) -> MatchIndices<'_, P> {
+        MatchIndices {
+            chars: self.as_parallel_string(),
+            pattern,
+        }
+    }
+}
+
+impl ParallelString for str {
+    #[inline]
+    fn as_parallel_string(&self) -> &str {
+        self
+    }
+}
+
+// /////////////////////////////////////////////////////////////////////////
+
+/// We hide the `Pattern` trait in a private module, as its API is not meant
+/// for general consumption.  If we could have privacy on trait items, then it
+/// would be nicer to have its basic existence and implementors public while
+/// keeping all of the methods private.
+mod private {
+    use crate::iter::plumbing::Folder;
+
+    /// Pattern-matching trait for `ParallelString`, somewhat like a mix of
+    /// `std::str::pattern::{Pattern, Searcher}`.
+    ///
+    /// Implementing this trait is not permitted outside of `rayon`.
+    pub trait Pattern: Sized + Sync + Send {
+        private_decl! {}
+        fn find_in(&self, haystack: &str) -> Option<usize>;
+        fn rfind_in(&self, haystack: &str) -> Option<usize>;
+        fn is_suffix_of(&self, haystack: &str) -> bool;
+        fn fold_splits<'ch, F>(&self, haystack: &'ch str, folder: F, skip_last: bool) -> F
+        where
+            F: Folder<&'ch str>;
+        fn fold_matches<'ch, F>(&self, haystack: &'ch str, folder: F) -> F
+        where
+            F: Folder<&'ch str>;
+        fn fold_match_indices<'ch, F>(&self, haystack: &'ch str, folder: F, base: usize) -> F
+        where
+            F: Folder<(usize, &'ch str)>;
+    }
+}
+use self::private::Pattern;
+
+#[inline]
+fn offset<T>(base: usize) -> impl Fn((usize, T)) -> (usize, T) {
+    move |(i, x)| (base + i, x)
+}
+
+impl Pattern for char {
+    private_impl! {}
+
+    #[inline]
+    fn find_in(&self, chars: &str) -> Option<usize> {
+        chars.find(*self)
+    }
+
+    #[inline]
+    fn rfind_in(&self, chars: &str) -> Option<usize> {
+        chars.rfind(*self)
+    }
+
+    #[inline]
+    fn is_suffix_of(&self, chars: &str) -> bool {
+        chars.ends_with(*self)
+    }
+
+    fn fold_splits<'ch, F>(&self, chars: &'ch str, folder: F, skip_last: bool) -> F
+    where
+        F: Folder<&'ch str>,
+    {
+        let mut split = chars.split(*self);
+        if skip_last {
+            split.next_back();
+        }
+        folder.consume_iter(split)
+    }
+
+    fn fold_matches<'ch, F>(&self, chars: &'ch str, folder: F) -> F
+    where
+        F: Folder<&'ch str>,
+    {
+        folder.consume_iter(chars.matches(*self))
+    }
+
+    fn fold_match_indices<'ch, F>(&self, chars: &'ch str, folder: F, base: usize) -> F
+    where
+        F: Folder<(usize, &'ch str)>,
+    {
+        folder.consume_iter(chars.match_indices(*self).map(offset(base)))
+    }
+}
+
+impl<FN: Sync + Send + Fn(char) -> bool> Pattern for FN {
+    private_impl! {}
+
+    fn find_in(&self, chars: &str) -> Option<usize> {
+        chars.find(self)
+    }
+
+    fn rfind_in(&self, chars: &str) -> Option<usize> {
+        chars.rfind(self)
+    }
+
+    fn is_suffix_of(&self, chars: &str) -> bool {
+        chars.ends_with(self)
+    }
+
+    fn fold_splits<'ch, F>(&self, chars: &'ch str, folder: F, skip_last: bool) -> F
+    where
+        F: Folder<&'ch str>,
+    {
+        let mut split = chars.split(self);
+        if skip_last {
+            split.next_back();
+        }
+        folder.consume_iter(split)
+    }
+
+    fn fold_matches<'ch, F>(&self, chars: &'ch str, folder: F) -> F
+    where
+        F: Folder<&'ch str>,
+    {
+        folder.consume_iter(chars.matches(self))
+    }
+
+    fn fold_match_indices<'ch, F>(&self, chars: &'ch str, folder: F, base: usize) -> F
+    where
+        F: Folder<(usize, &'ch str)>,
+    {
+        folder.consume_iter(chars.match_indices(self).map(offset(base)))
+    }
+}
+
+// /////////////////////////////////////////////////////////////////////////
+
+/// Parallel iterator over the characters of a string
+#[derive(Debug, Clone)]
+pub struct Chars<'ch> {
+    chars: &'ch str,
+}
+
+struct CharsProducer<'ch> {
+    chars: &'ch str,
+}
+
+impl<'ch> ParallelIterator for Chars<'ch> {
+    type Item = char;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge_unindexed(CharsProducer { chars: self.chars }, consumer)
+    }
+}
+
+impl<'ch> UnindexedProducer for CharsProducer<'ch> {
+    type Item = char;
+
+    fn split(self) -> (Self, Option<Self>) {
+        match split(self.chars) {
+            Some((left, right)) => (
+                CharsProducer { chars: left },
+                Some(CharsProducer { chars: right }),
+            ),
+            None => (self, None),
+        }
+    }
+
+    fn fold_with<F>(self, folder: F) -> F
+    where
+        F: Folder<Self::Item>,
+    {
+        folder.consume_iter(self.chars.chars())
+    }
+}
+
+// /////////////////////////////////////////////////////////////////////////
+
+/// Parallel iterator over the characters of a string, with their positions
+#[derive(Debug, Clone)]
+pub struct CharIndices<'ch> {
+    chars: &'ch str,
+}
+
+struct CharIndicesProducer<'ch> {
+    index: usize,
+    chars: &'ch str,
+}
+
+impl<'ch> ParallelIterator for CharIndices<'ch> {
+    type Item = (usize, char);
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let producer = CharIndicesProducer {
+            index: 0,
+            chars: self.chars,
+        };
+        bridge_unindexed(producer, consumer)
+    }
+}
+
+impl<'ch> UnindexedProducer for CharIndicesProducer<'ch> {
+    type Item = (usize, char);
+
+    fn split(self) -> (Self, Option<Self>) {
+        match split(self.chars) {
+            Some((left, right)) => (
+                CharIndicesProducer {
+                    chars: left,
+                    ..self
+                },
+                Some(CharIndicesProducer {
+                    chars: right,
+                    index: self.index + left.len(),
+                }),
+            ),
+            None => (self, None),
+        }
+    }
+
+    fn fold_with<F>(self, folder: F) -> F
+    where
+        F: Folder<Self::Item>,
+    {
+        let base = self.index;
+        folder.consume_iter(self.chars.char_indices().map(offset(base)))
+    }
+}
+
+// /////////////////////////////////////////////////////////////////////////
+
+/// Parallel iterator over the bytes of a string
+#[derive(Debug, Clone)]
+pub struct Bytes<'ch> {
+    chars: &'ch str,
+}
+
+struct BytesProducer<'ch> {
+    chars: &'ch str,
+}
+
+impl<'ch> ParallelIterator for Bytes<'ch> {
+    type Item = u8;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge_unindexed(BytesProducer { chars: self.chars }, consumer)
+    }
+}
+
+impl<'ch> UnindexedProducer for BytesProducer<'ch> {
+    type Item = u8;
+
+    fn split(self) -> (Self, Option<Self>) {
+        match split(self.chars) {
+            Some((left, right)) => (
+                BytesProducer { chars: left },
+                Some(BytesProducer { chars: right }),
+            ),
+            None => (self, None),
+        }
+    }
+
+    fn fold_with<F>(self, folder: F) -> F
+    where
+        F: Folder<Self::Item>,
+    {
+        folder.consume_iter(self.chars.bytes())
+    }
+}
+
+// /////////////////////////////////////////////////////////////////////////
+
+/// Parallel iterator over a string encoded as UTF-16
+#[derive(Debug, Clone)]
+pub struct EncodeUtf16<'ch> {
+    chars: &'ch str,
+}
+
+struct EncodeUtf16Producer<'ch> {
+    chars: &'ch str,
+}
+
+impl<'ch> ParallelIterator for EncodeUtf16<'ch> {
+    type Item = u16;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge_unindexed(EncodeUtf16Producer { chars: self.chars }, consumer)
+    }
+}
+
+impl<'ch> UnindexedProducer for EncodeUtf16Producer<'ch> {
+    type Item = u16;
+
+    fn split(self) -> (Self, Option<Self>) {
+        match split(self.chars) {
+            Some((left, right)) => (
+                EncodeUtf16Producer { chars: left },
+                Some(EncodeUtf16Producer { chars: right }),
+            ),
+            None => (self, None),
+        }
+    }
+
+    fn fold_with<F>(self, folder: F) -> F
+    where
+        F: Folder<Self::Item>,
+    {
+        folder.consume_iter(self.chars.encode_utf16())
+    }
+}
+
+// /////////////////////////////////////////////////////////////////////////
+
+/// Parallel iterator over substrings separated by a pattern
+#[derive(Debug, Clone)]
+pub struct Split<'ch, P: Pattern> {
+    chars: &'ch str,
+    separator: P,
+}
+
+impl<'ch, P: Pattern> Split<'ch, P> {
+    fn new(chars: &'ch str, separator: P) -> Self {
+        Split { chars, separator }
+    }
+}
+
+impl<'ch, P: Pattern> ParallelIterator for Split<'ch, P> {
+    type Item = &'ch str;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let producer = SplitProducer::new(self.chars, &self.separator);
+        bridge_unindexed(producer, consumer)
+    }
+}
+
+/// Implement support for `SplitProducer`.
+impl<'ch, P: Pattern> Fissile<P> for &'ch str {
+    fn length(&self) -> usize {
+        self.len()
+    }
+
+    fn midpoint(&self, end: usize) -> usize {
+        // First find a suitable UTF-8 boundary.
+        find_char_midpoint(&self[..end])
+    }
+
+    fn find(&self, separator: &P, start: usize, end: usize) -> Option<usize> {
+        separator.find_in(&self[start..end])
+    }
+
+    fn rfind(&self, separator: &P, end: usize) -> Option<usize> {
+        separator.rfind_in(&self[..end])
+    }
+
+    fn split_once(self, index: usize) -> (Self, Self) {
+        let (left, right) = self.split_at(index);
+        let mut right_iter = right.chars();
+        right_iter.next(); // skip the separator
+        (left, right_iter.as_str())
+    }
+
+    fn fold_splits<F>(self, separator: &P, folder: F, skip_last: bool) -> F
+    where
+        F: Folder<Self>,
+    {
+        separator.fold_splits(self, folder, skip_last)
+    }
+}
+
+// /////////////////////////////////////////////////////////////////////////
+
+/// Parallel iterator over substrings separated by a terminator pattern
+#[derive(Debug, Clone)]
+pub struct SplitTerminator<'ch, P: Pattern> {
+    chars: &'ch str,
+    terminator: P,
+}
+
+struct SplitTerminatorProducer<'ch, 'sep, P: Pattern> {
+    splitter: SplitProducer<'sep, P, &'ch str>,
+    skip_last: bool,
+}
+
+impl<'ch, P: Pattern> SplitTerminator<'ch, P> {
+    fn new(chars: &'ch str, terminator: P) -> Self {
+        SplitTerminator { chars, terminator }
+    }
+}
+
+impl<'ch, 'sep, P: Pattern + 'sep> SplitTerminatorProducer<'ch, 'sep, P> {
+    fn new(chars: &'ch str, terminator: &'sep P) -> Self {
+        SplitTerminatorProducer {
+            splitter: SplitProducer::new(chars, terminator),
+            skip_last: chars.is_empty() || terminator.is_suffix_of(chars),
+        }
+    }
+}
+
+impl<'ch, P: Pattern> ParallelIterator for SplitTerminator<'ch, P> {
+    type Item = &'ch str;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let producer = SplitTerminatorProducer::new(self.chars, &self.terminator);
+        bridge_unindexed(producer, consumer)
+    }
+}
+
+impl<'ch, 'sep, P: Pattern + 'sep> UnindexedProducer for SplitTerminatorProducer<'ch, 'sep, P> {
+    type Item = &'ch str;
+
+    fn split(mut self) -> (Self, Option<Self>) {
+        let (left, right) = self.splitter.split();
+        self.splitter = left;
+        let right = right.map(|right| {
+            let skip_last = self.skip_last;
+            self.skip_last = false;
+            SplitTerminatorProducer {
+                splitter: right,
+                skip_last,
+            }
+        });
+        (self, right)
+    }
+
+    fn fold_with<F>(self, folder: F) -> F
+    where
+        F: Folder<Self::Item>,
+    {
+        self.splitter.fold_with(folder, self.skip_last)
+    }
+}
+
+// /////////////////////////////////////////////////////////////////////////
+
+/// Parallel iterator over lines in a string
+#[derive(Debug, Clone)]
+pub struct Lines<'ch>(&'ch str);
+
+#[inline]
+fn no_carriage_return(line: &str) -> &str {
+    if line.ends_with('\r') {
+        &line[..line.len() - 1]
+    } else {
+        line
+    }
+}
+
+impl<'ch> ParallelIterator for Lines<'ch> {
+    type Item = &'ch str;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        self.0
+            .par_split_terminator('\n')
+            .map(no_carriage_return)
+            .drive_unindexed(consumer)
+    }
+}
+
+// /////////////////////////////////////////////////////////////////////////
+
+/// Parallel iterator over substrings separated by whitespace
+#[derive(Debug, Clone)]
+pub struct SplitWhitespace<'ch>(&'ch str);
+
+#[inline]
+fn not_empty(s: &&str) -> bool {
+    !s.is_empty()
+}
+
+impl<'ch> ParallelIterator for SplitWhitespace<'ch> {
+    type Item = &'ch str;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        self.0
+            .par_split(char::is_whitespace)
+            .filter(not_empty)
+            .drive_unindexed(consumer)
+    }
+}
+
+// /////////////////////////////////////////////////////////////////////////
+
+/// Parallel iterator over substrings that match a pattern
+#[derive(Debug, Clone)]
+pub struct Matches<'ch, P: Pattern> {
+    chars: &'ch str,
+    pattern: P,
+}
+
+struct MatchesProducer<'ch, 'pat, P: Pattern> {
+    chars: &'ch str,
+    pattern: &'pat P,
+}
+
+impl<'ch, P: Pattern> ParallelIterator for Matches<'ch, P> {
+    type Item = &'ch str;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let producer = MatchesProducer {
+            chars: self.chars,
+            pattern: &self.pattern,
+        };
+        bridge_unindexed(producer, consumer)
+    }
+}
+
+impl<'ch, 'pat, P: Pattern> UnindexedProducer for MatchesProducer<'ch, 'pat, P> {
+    type Item = &'ch str;
+
+    fn split(self) -> (Self, Option<Self>) {
+        match split(self.chars) {
+            Some((left, right)) => (
+                MatchesProducer {
+                    chars: left,
+                    ..self
+                },
+                Some(MatchesProducer {
+                    chars: right,
+                    ..self
+                }),
+            ),
+            None => (self, None),
+        }
+    }
+
+    fn fold_with<F>(self, folder: F) -> F
+    where
+        F: Folder<Self::Item>,
+    {
+        self.pattern.fold_matches(self.chars, folder)
+    }
+}
+
+// /////////////////////////////////////////////////////////////////////////
+
+/// Parallel iterator over substrings that match a pattern, with their positions
+#[derive(Debug, Clone)]
+pub struct MatchIndices<'ch, P: Pattern> {
+    chars: &'ch str,
+    pattern: P,
+}
+
+struct MatchIndicesProducer<'ch, 'pat, P: Pattern> {
+    index: usize,
+    chars: &'ch str,
+    pattern: &'pat P,
+}
+
+impl<'ch, P: Pattern> ParallelIterator for MatchIndices<'ch, P> {
+    type Item = (usize, &'ch str);
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        let producer = MatchIndicesProducer {
+            index: 0,
+            chars: self.chars,
+            pattern: &self.pattern,
+        };
+        bridge_unindexed(producer, consumer)
+    }
+}
+
+impl<'ch, 'pat, P: Pattern> UnindexedProducer for MatchIndicesProducer<'ch, 'pat, P> {
+    type Item = (usize, &'ch str);
+
+    fn split(self) -> (Self, Option<Self>) {
+        match split(self.chars) {
+            Some((left, right)) => (
+                MatchIndicesProducer {
+                    chars: left,
+                    ..self
+                },
+                Some(MatchIndicesProducer {
+                    chars: right,
+                    index: self.index + left.len(),
+                    ..self
+                }),
+            ),
+            None => (self, None),
+        }
+    }
+
+    fn fold_with<F>(self, folder: F) -> F
+    where
+        F: Folder<Self::Item>,
+    {
+        self.pattern
+            .fold_match_indices(self.chars, folder, self.index)
+    }
+}
diff --git a/src/string.rs b/src/string.rs
new file mode 100644
index 0000000..91e69f9
--- /dev/null
+++ b/src/string.rs
@@ -0,0 +1,48 @@
+//! This module contains the parallel iterator types for owned strings
+//! (`String`). You will rarely need to interact with it directly
+//! unless you have need to name one of the iterator types.
+
+use crate::iter::plumbing::*;
+use crate::math::simplify_range;
+use crate::prelude::*;
+use std::ops::{Range, RangeBounds};
+
+impl<'a> ParallelDrainRange<usize> for &'a mut String {
+    type Iter = Drain<'a>;
+    type Item = char;
+
+    fn par_drain<R: RangeBounds<usize>>(self, range: R) -> Self::Iter {
+        Drain {
+            range: simplify_range(range, self.len()),
+            string: self,
+        }
+    }
+}
+
+/// Draining parallel iterator that moves a range of characters out of a string,
+/// but keeps the total capacity.
+#[derive(Debug)]
+pub struct Drain<'a> {
+    string: &'a mut String,
+    range: Range<usize>,
+}
+
+impl<'a> ParallelIterator for Drain<'a> {
+    type Item = char;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        self.string[self.range.clone()]
+            .par_chars()
+            .drive_unindexed(consumer)
+    }
+}
+
+impl<'a> Drop for Drain<'a> {
+    fn drop(&mut self) {
+        // Remove the drained range.
+        self.string.drain(self.range.clone());
+    }
+}
diff --git a/src/vec.rs b/src/vec.rs
new file mode 100644
index 0000000..686673b
--- /dev/null
+++ b/src/vec.rs
@@ -0,0 +1,245 @@
+//! Parallel iterator types for [vectors][std::vec] (`Vec<T>`)
+//!
+//! You will rarely need to interact with this module directly unless you need
+//! to name one of the iterator types.
+//!
+//! [std::vec]: https://doc.rust-lang.org/stable/std/vec/
+
+use crate::iter::plumbing::*;
+use crate::iter::*;
+use crate::math::simplify_range;
+use std::iter;
+use std::mem;
+use std::ops::{Range, RangeBounds};
+use std::ptr;
+use std::slice;
+
+/// Parallel iterator that moves out of a vector.
+#[derive(Debug, Clone)]
+pub struct IntoIter<T: Send> {
+    vec: Vec<T>,
+}
+
+impl<T: Send> IntoParallelIterator for Vec<T> {
+    type Item = T;
+    type Iter = IntoIter<T>;
+
+    fn into_par_iter(self) -> Self::Iter {
+        IntoIter { vec: self }
+    }
+}
+
+impl<T: Send> ParallelIterator for IntoIter<T> {
+    type Item = T;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<T: Send> IndexedParallelIterator for IntoIter<T> {
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn len(&self) -> usize {
+        self.vec.len()
+    }
+
+    fn with_producer<CB>(mut self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        // Drain every item, and then the vector only needs to free its buffer.
+        self.vec.par_drain(..).with_producer(callback)
+    }
+}
+
+impl<'data, T: Send> ParallelDrainRange<usize> for &'data mut Vec<T> {
+    type Iter = Drain<'data, T>;
+    type Item = T;
+
+    fn par_drain<R: RangeBounds<usize>>(self, range: R) -> Self::Iter {
+        Drain {
+            orig_len: self.len(),
+            range: simplify_range(range, self.len()),
+            vec: self,
+        }
+    }
+}
+
+/// Draining parallel iterator that moves a range out of a vector, but keeps the total capacity.
+#[derive(Debug)]
+pub struct Drain<'data, T: Send> {
+    vec: &'data mut Vec<T>,
+    range: Range<usize>,
+    orig_len: usize,
+}
+
+impl<'data, T: Send> ParallelIterator for Drain<'data, T> {
+    type Item = T;
+
+    fn drive_unindexed<C>(self, consumer: C) -> C::Result
+    where
+        C: UnindexedConsumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn opt_len(&self) -> Option<usize> {
+        Some(self.len())
+    }
+}
+
+impl<'data, T: Send> IndexedParallelIterator for Drain<'data, T> {
+    fn drive<C>(self, consumer: C) -> C::Result
+    where
+        C: Consumer<Self::Item>,
+    {
+        bridge(self, consumer)
+    }
+
+    fn len(&self) -> usize {
+        self.range.len()
+    }
+
+    fn with_producer<CB>(self, callback: CB) -> CB::Output
+    where
+        CB: ProducerCallback<Self::Item>,
+    {
+        unsafe {
+            // Make the vector forget about the drained items, and temporarily the tail too.
+            let start = self.range.start;
+            self.vec.set_len(start);
+
+            // Get a correct borrow lifetime, then extend it to the original length.
+            let mut slice = &mut self.vec[start..];
+            slice = slice::from_raw_parts_mut(slice.as_mut_ptr(), self.range.len());
+
+            // The producer will move or drop each item from the drained range.
+            callback.callback(DrainProducer::new(slice))
+        }
+    }
+}
+
+impl<'data, T: Send> Drop for Drain<'data, T> {
+    fn drop(&mut self) {
+        if self.range.len() > 0 {
+            let Range { start, end } = self.range;
+            if self.vec.len() != start {
+                // We must not have produced, so just call a normal drain to remove the items.
+                assert_eq!(self.vec.len(), self.orig_len);
+                self.vec.drain(start..end);
+            } else if end < self.orig_len {
+                // The producer was responsible for consuming the drained items.
+                // Move the tail items to their new place, then set the length to include them.
+                unsafe {
+                    let ptr = self.vec.as_mut_ptr().add(start);
+                    let tail_ptr = self.vec.as_ptr().add(end);
+                    let tail_len = self.orig_len - end;
+                    ptr::copy(tail_ptr, ptr, tail_len);
+                    self.vec.set_len(start + tail_len);
+                }
+            }
+        }
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+
+pub(crate) struct DrainProducer<'data, T: Send> {
+    slice: &'data mut [T],
+}
+
+impl<'data, T: 'data + Send> DrainProducer<'data, T> {
+    /// Creates a draining producer, which *moves* items from the slice.
+    ///
+    /// Unsafe bacause `!Copy` data must not be read after the borrow is released.
+    pub(crate) unsafe fn new(slice: &'data mut [T]) -> Self {
+        DrainProducer { slice }
+    }
+}
+
+impl<'data, T: 'data + Send> Producer for DrainProducer<'data, T> {
+    type Item = T;
+    type IntoIter = SliceDrain<'data, T>;
+
+    fn into_iter(mut self) -> Self::IntoIter {
+        // replace the slice so we don't drop it twice
+        let slice = mem::replace(&mut self.slice, &mut []);
+        SliceDrain {
+            iter: slice.iter_mut(),
+        }
+    }
+
+    fn split_at(mut self, index: usize) -> (Self, Self) {
+        // replace the slice so we don't drop it twice
+        let slice = mem::replace(&mut self.slice, &mut []);
+        let (left, right) = slice.split_at_mut(index);
+        unsafe { (DrainProducer::new(left), DrainProducer::new(right)) }
+    }
+}
+
+impl<'data, T: 'data + Send> Drop for DrainProducer<'data, T> {
+    fn drop(&mut self) {
+        // use `Drop for [T]`
+        unsafe { ptr::drop_in_place(self.slice) };
+    }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+
+// like std::vec::Drain, without updating a source Vec
+pub(crate) struct SliceDrain<'data, T> {
+    iter: slice::IterMut<'data, T>,
+}
+
+impl<'data, T: 'data> Iterator for SliceDrain<'data, T> {
+    type Item = T;
+
+    fn next(&mut self) -> Option<T> {
+        let ptr = self.iter.next()?;
+        Some(unsafe { ptr::read(ptr) })
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.iter.size_hint()
+    }
+
+    fn count(self) -> usize {
+        self.iter.len()
+    }
+}
+
+impl<'data, T: 'data> DoubleEndedIterator for SliceDrain<'data, T> {
+    fn next_back(&mut self) -> Option<Self::Item> {
+        let ptr = self.iter.next_back()?;
+        Some(unsafe { ptr::read(ptr) })
+    }
+}
+
+impl<'data, T: 'data> ExactSizeIterator for SliceDrain<'data, T> {
+    fn len(&self) -> usize {
+        self.iter.len()
+    }
+}
+
+impl<'data, T: 'data> iter::FusedIterator for SliceDrain<'data, T> {}
+
+impl<'data, T: 'data> Drop for SliceDrain<'data, T> {
+    fn drop(&mut self) {
+        // extract the iterator so we can use `Drop for [T]`
+        let iter = mem::replace(&mut self.iter, [].iter_mut());
+        unsafe { ptr::drop_in_place(iter.into_slice()) };
+    }
+}
diff --git a/tests/chars.rs b/tests/chars.rs
new file mode 100644
index 0000000..ac8e3f3
--- /dev/null
+++ b/tests/chars.rs
@@ -0,0 +1,39 @@
+use rayon::prelude::*;
+use std::char;
+
+#[test]
+fn half_open_correctness() {
+    let low = char::from_u32(0xD800 - 0x7).unwrap();
+    let high = char::from_u32(0xE000 + 0x7).unwrap();
+
+    let range = low..high;
+    let mut chars: Vec<char> = range.into_par_iter().collect();
+    chars.sort();
+
+    assert_eq!(
+        chars,
+        vec![
+            '\u{D7F9}', '\u{D7FA}', '\u{D7FB}', '\u{D7FC}', '\u{D7FD}', '\u{D7FE}', '\u{D7FF}',
+            '\u{E000}', '\u{E001}', '\u{E002}', '\u{E003}', '\u{E004}', '\u{E005}', '\u{E006}',
+        ]
+    );
+}
+
+#[test]
+fn closed_correctness() {
+    let low = char::from_u32(0xD800 - 0x7).unwrap();
+    let high = char::from_u32(0xE000 + 0x7).unwrap();
+
+    let range = low..=high;
+    let mut chars: Vec<char> = range.into_par_iter().collect();
+    chars.sort();
+
+    assert_eq!(
+        chars,
+        vec![
+            '\u{D7F9}', '\u{D7FA}', '\u{D7FB}', '\u{D7FC}', '\u{D7FD}', '\u{D7FE}', '\u{D7FF}',
+            '\u{E000}', '\u{E001}', '\u{E002}', '\u{E003}', '\u{E004}', '\u{E005}', '\u{E006}',
+            '\u{E007}',
+        ]
+    );
+}
diff --git a/tests/clones.rs b/tests/clones.rs
new file mode 100644
index 0000000..fa93f8a
--- /dev/null
+++ b/tests/clones.rs
@@ -0,0 +1,186 @@
+use rayon::prelude::*;
+
+fn check<I>(iter: I)
+where
+    I: ParallelIterator + Clone,
+    I::Item: std::fmt::Debug + PartialEq,
+{
+    let a: Vec<_> = iter.clone().collect();
+    let b: Vec<_> = iter.collect();
+    assert_eq!(a, b);
+}
+
+#[test]
+fn clone_binary_heap() {
+    use std::collections::BinaryHeap;
+    let heap: BinaryHeap<_> = (0..1000).collect();
+    check(heap.par_iter());
+    check(heap.into_par_iter());
+}
+
+#[test]
+fn clone_btree_map() {
+    use std::collections::BTreeMap;
+    let map: BTreeMap<_, _> = (0..1000).enumerate().collect();
+    check(map.par_iter());
+}
+
+#[test]
+fn clone_btree_set() {
+    use std::collections::BTreeSet;
+    let set: BTreeSet<_> = (0..1000).collect();
+    check(set.par_iter());
+}
+
+#[test]
+fn clone_hash_map() {
+    use std::collections::HashMap;
+    let map: HashMap<_, _> = (0..1000).enumerate().collect();
+    check(map.par_iter());
+}
+
+#[test]
+fn clone_hash_set() {
+    use std::collections::HashSet;
+    let set: HashSet<_> = (0..1000).collect();
+    check(set.par_iter());
+}
+
+#[test]
+fn clone_linked_list() {
+    use std::collections::LinkedList;
+    let list: LinkedList<_> = (0..1000).collect();
+    check(list.par_iter());
+    check(list.into_par_iter());
+}
+
+#[test]
+fn clone_vec_deque() {
+    use std::collections::VecDeque;
+    let deque: VecDeque<_> = (0..1000).collect();
+    check(deque.par_iter());
+    check(deque.into_par_iter());
+}
+
+#[test]
+fn clone_option() {
+    let option = Some(0);
+    check(option.par_iter());
+    check(option.into_par_iter());
+}
+
+#[test]
+fn clone_result() {
+    let result = Ok::<_, ()>(0);
+    check(result.par_iter());
+    check(result.into_par_iter());
+}
+
+#[test]
+fn clone_range() {
+    check((0..1000).into_par_iter());
+}
+
+#[test]
+fn clone_range_inclusive() {
+    check((0..=1000).into_par_iter());
+}
+
+#[test]
+fn clone_str() {
+    let s = include_str!("clones.rs");
+    check(s.par_chars());
+    check(s.par_lines());
+    check(s.par_split('\n'));
+    check(s.par_split_terminator('\n'));
+    check(s.par_split_whitespace());
+}
+
+#[test]
+fn clone_vec() {
+    let v: Vec<_> = (0..1000).collect();
+    check(v.par_iter());
+    check(v.par_chunks(42));
+    check(v.par_chunks_exact(42));
+    check(v.par_windows(42));
+    check(v.par_split(|x| x % 3 == 0));
+    check(v.into_par_iter());
+}
+
+#[test]
+fn clone_adaptors() {
+    let v: Vec<_> = (0..1000).map(Some).collect();
+    check(v.par_iter().chain(&v));
+    check(v.par_iter().cloned());
+    check(v.par_iter().copied());
+    check(v.par_iter().enumerate());
+    check(v.par_iter().filter(|_| true));
+    check(v.par_iter().filter_map(|x| *x));
+    check(v.par_iter().flat_map(|x| *x));
+    check(v.par_iter().flat_map_iter(|x| *x));
+    check(v.par_iter().flatten());
+    check(v.par_iter().flatten_iter());
+    check(v.par_iter().with_max_len(1).fold(|| 0, |x, _| x));
+    check(v.par_iter().with_max_len(1).fold_with(0, |x, _| x));
+    check(v.par_iter().with_max_len(1).try_fold(|| 0, |_, &x| x));
+    check(v.par_iter().with_max_len(1).try_fold_with(0, |_, &x| x));
+    check(v.par_iter().inspect(|_| ()));
+    check(v.par_iter().update(|_| ()));
+    check(v.par_iter().interleave(&v));
+    check(v.par_iter().interleave_shortest(&v));
+    check(v.par_iter().intersperse(&None));
+    check(v.par_iter().chunks(3));
+    check(v.par_iter().map(|x| x));
+    check(v.par_iter().map_with(0, |_, x| x));
+    check(v.par_iter().map_init(|| 0, |_, x| x));
+    check(v.par_iter().panic_fuse());
+    check(v.par_iter().positions(|_| true));
+    check(v.par_iter().rev());
+    check(v.par_iter().skip(1));
+    check(v.par_iter().take(1));
+    check(v.par_iter().cloned().while_some());
+    check(v.par_iter().with_max_len(1));
+    check(v.par_iter().with_min_len(1));
+    check(v.par_iter().zip(&v));
+    check(v.par_iter().zip_eq(&v));
+    check(v.par_iter().step_by(2));
+}
+
+#[test]
+fn clone_empty() {
+    check(rayon::iter::empty::<i32>());
+}
+
+#[test]
+fn clone_once() {
+    check(rayon::iter::once(10));
+}
+
+#[test]
+fn clone_repeat() {
+    let x: Option<i32> = None;
+    check(rayon::iter::repeat(x).while_some());
+    check(rayon::iter::repeatn(x, 1000));
+}
+
+#[test]
+fn clone_splitter() {
+    check(rayon::iter::split(0..1000, |x| (x, None)));
+}
+
+#[test]
+fn clone_multizip() {
+    let v: &Vec<_> = &(0..1000).collect();
+    check((v,).into_par_iter());
+    check((v, v).into_par_iter());
+    check((v, v, v).into_par_iter());
+    check((v, v, v, v).into_par_iter());
+    check((v, v, v, v, v).into_par_iter());
+    check((v, v, v, v, v, v).into_par_iter());
+    check((v, v, v, v, v, v, v).into_par_iter());
+    check((v, v, v, v, v, v, v, v).into_par_iter());
+    check((v, v, v, v, v, v, v, v, v).into_par_iter());
+    check((v, v, v, v, v, v, v, v, v, v).into_par_iter());
+    check((v, v, v, v, v, v, v, v, v, v, v).into_par_iter());
+    check((v, v, v, v, v, v, v, v, v, v, v, v).into_par_iter());
+}
diff --git a/tests/collect.rs b/tests/collect.rs
new file mode 100644
index 0000000..48b80f6
--- /dev/null
+++ b/tests/collect.rs
@@ -0,0 +1,111 @@
+use rayon::prelude::*;
+
+use std::panic;
+use std::sync::atomic::AtomicUsize;
+use std::sync::atomic::Ordering;
+use std::sync::Mutex;
+
+#[test]
+fn collect_drop_on_unwind() {
+    struct Recorddrop<'a>(i64, &'a Mutex<Vec<i64>>);
+
+    impl<'a> Drop for Recorddrop<'a> {
+        fn drop(&mut self) {
+            self.1.lock().unwrap().push(self.0);
+        }
+    }
+
+    let test_collect_panic = |will_panic: bool| {
+        let test_vec_len = 1024;
+        let panic_point = 740;
+
+        let mut inserts = Mutex::new(Vec::new());
+        let mut drops = Mutex::new(Vec::new());
+
+        let mut a = (0..test_vec_len).collect::<Vec<_>>();
+        let b = (0..test_vec_len).collect::<Vec<_>>();
+
+        let _result = panic::catch_unwind(panic::AssertUnwindSafe(|| {
+            let mut result = Vec::new();
+            a.par_iter_mut()
+                .zip(&b)
+                .map(|(&mut a, &b)| {
+                    if a > panic_point && will_panic {
+                        panic!("unwinding for test");
+                    }
+                    let elt = a + b;
+                    inserts.lock().unwrap().push(elt);
+                    Recorddrop(elt, &drops)
+                })
+                .collect_into_vec(&mut result);
+
+            // If we reach this point, this must pass
+            assert_eq!(a.len(), result.len());
+        }));
+
+        let inserts = inserts.get_mut().unwrap();
+        let drops = drops.get_mut().unwrap();
+        println!("{:?}", inserts);
+        println!("{:?}", drops);
+
+        assert_eq!(inserts.len(), drops.len(), "Incorrect number of drops");
+        // sort to normalize order
+        inserts.sort();
+        drops.sort();
+        assert_eq!(inserts, drops, "Incorrect elements were dropped");
+    };
+
+    for &should_panic in &[true, false] {
+        test_collect_panic(should_panic);
+    }
+}
+
+#[test]
+fn collect_drop_on_unwind_zst() {
+    static INSERTS: AtomicUsize = AtomicUsize::new(0);
+    static DROPS: AtomicUsize = AtomicUsize::new(0);
+
+    struct RecorddropZst;
+
+    impl Drop for RecorddropZst {
+        fn drop(&mut self) {
+            DROPS.fetch_add(1, Ordering::SeqCst);
+        }
+    }
+
+    let test_collect_panic = |will_panic: bool| {
+        INSERTS.store(0, Ordering::SeqCst);
+        DROPS.store(0, Ordering::SeqCst);
+
+        let test_vec_len = 1024;
+        let panic_point = 740;
+
+        let a = (0..test_vec_len).collect::<Vec<_>>();
+
+        let _result = panic::catch_unwind(panic::AssertUnwindSafe(|| {
+            let mut result = Vec::new();
+            a.par_iter()
+                .map(|&a| {
+                    if a > panic_point && will_panic {
+                        panic!("unwinding for test");
+                    }
+                    INSERTS.fetch_add(1, Ordering::SeqCst);
+                    RecorddropZst
+                })
+                .collect_into_vec(&mut result);
+
+            // If we reach this point, this must pass
+            assert_eq!(a.len(), result.len());
+        }));
+
+        let inserts = INSERTS.load(Ordering::SeqCst);
+        let drops = DROPS.load(Ordering::SeqCst);
+
+        assert_eq!(inserts, drops, "Incorrect number of drops");
+        assert!(will_panic || drops == test_vec_len)
+    };
+
+    for &should_panic in &[true, false] {
+        test_collect_panic(should_panic);
+    }
+}
diff --git a/tests/cross-pool.rs b/tests/cross-pool.rs
new file mode 100644
index 0000000..f0a2128
--- /dev/null
+++ b/tests/cross-pool.rs
@@ -0,0 +1,21 @@
+use rayon::prelude::*;
+use rayon::ThreadPoolBuilder;
+
+#[test]
+fn cross_pool_busy() {
+    let pool1 = ThreadPoolBuilder::new().num_threads(1).build().unwrap();
+    let pool2 = ThreadPoolBuilder::new().num_threads(1).build().unwrap();
+
+    let n: i32 = 100;
+    let sum: i32 = pool1.install(move || {
+        // Each item will block on pool2, but pool1 can continue processing other work from the
+        // parallel iterator in the meantime. There's a chance that pool1 will still be awake to
+        // see the latch set without being tickled, and then it will drop that stack job. The latch
+        // internals must not assume that the job will still be alive after it's set!
+        (1..=n)
+            .into_par_iter()
+            .map(|i| pool2.install(move || i))
+            .sum()
+    });
+    assert_eq!(sum, n * (n + 1) / 2);
+}
diff --git a/tests/debug.rs b/tests/debug.rs
new file mode 100644
index 0000000..fb11110
--- /dev/null
+++ b/tests/debug.rs
@@ -0,0 +1,209 @@
+use rayon::prelude::*;
+use std::fmt::Debug;
+
+fn check<I>(iter: I)
+where
+    I: ParallelIterator + Debug,
+{
+    println!("{:?}", iter);
+}
+
+#[test]
+fn debug_binary_heap() {
+    use std::collections::BinaryHeap;
+    let mut heap: BinaryHeap<_> = (0..10).collect();
+    check(heap.par_iter());
+    check(heap.par_drain());
+    check(heap.into_par_iter());
+}
+
+#[test]
+fn debug_btree_map() {
+    use std::collections::BTreeMap;
+    let mut map: BTreeMap<_, _> = (0..10).enumerate().collect();
+    check(map.par_iter());
+    check(map.par_iter_mut());
+    check(map.into_par_iter());
+}
+
+#[test]
+fn debug_btree_set() {
+    use std::collections::BTreeSet;
+    let set: BTreeSet<_> = (0..10).collect();
+    check(set.par_iter());
+    check(set.into_par_iter());
+}
+
+#[test]
+fn debug_hash_map() {
+    use std::collections::HashMap;
+    let mut map: HashMap<_, _> = (0..10).enumerate().collect();
+    check(map.par_iter());
+    check(map.par_iter_mut());
+    check(map.par_drain());
+    check(map.into_par_iter());
+}
+
+#[test]
+fn debug_hash_set() {
+    use std::collections::HashSet;
+    let mut set: HashSet<_> = (0..10).collect();
+    check(set.par_iter());
+    check(set.par_drain());
+    check(set.into_par_iter());
+}
+
+#[test]
+fn debug_linked_list() {
+    use std::collections::LinkedList;
+    let mut list: LinkedList<_> = (0..10).collect();
+    check(list.par_iter());
+    check(list.par_iter_mut());
+    check(list.into_par_iter());
+}
+
+#[test]
+fn debug_vec_deque() {
+    use std::collections::VecDeque;
+    let mut deque: VecDeque<_> = (0..10).collect();
+    check(deque.par_iter());
+    check(deque.par_iter_mut());
+    check(deque.par_drain(..));
+    check(deque.into_par_iter());
+}
+
+#[test]
+fn debug_option() {
+    let mut option = Some(0);
+    check(option.par_iter());
+    check(option.par_iter_mut());
+    check(option.into_par_iter());
+}
+
+#[test]
+fn debug_result() {
+    let mut result = Ok::<_, ()>(0);
+    check(result.par_iter());
+    check(result.par_iter_mut());
+    check(result.into_par_iter());
+}
+
+#[test]
+fn debug_range() {
+    check((0..10).into_par_iter());
+}
+
+#[test]
+fn debug_range_inclusive() {
+    check((0..=10).into_par_iter());
+}
+
+#[test]
+fn debug_str() {
+    let s = "a b c d\ne f g";
+    check(s.par_chars());
+    check(s.par_lines());
+    check(s.par_split('\n'));
+    check(s.par_split_terminator('\n'));
+    check(s.par_split_whitespace());
+}
+
+#[test]
+fn debug_string() {
+    let mut s = "a b c d\ne f g".to_string();
+    s.par_drain(..);
+}
+
+#[test]
+fn debug_vec() {
+    let mut v: Vec<_> = (0..10).collect();
+    check(v.par_iter());
+    check(v.par_iter_mut());
+    check(v.par_chunks(42));
+    check(v.par_chunks_exact(42));
+    check(v.par_chunks_mut(42));
+    check(v.par_chunks_exact_mut(42));
+    check(v.par_windows(42));
+    check(v.par_split(|x| x % 3 == 0));
+    check(v.par_split_mut(|x| x % 3 == 0));
+    check(v.par_drain(..));
+    check(v.into_par_iter());
+}
+
+#[test]
+fn debug_adaptors() {
+    let v: Vec<_> = (0..10).collect();
+    check(v.par_iter().chain(&v));
+    check(v.par_iter().cloned());
+    check(v.par_iter().copied());
+    check(v.par_iter().enumerate());
+    check(v.par_iter().filter(|_| true));
+    check(v.par_iter().filter_map(Some));
+    check(v.par_iter().flat_map(Some));
+    check(v.par_iter().flat_map_iter(Some));
+    check(v.par_iter().map(Some).flatten());
+    check(v.par_iter().map(Some).flatten_iter());
+    check(v.par_iter().fold(|| 0, |x, _| x));
+    check(v.par_iter().fold_with(0, |x, _| x));
+    check(v.par_iter().try_fold(|| 0, |x, _| Some(x)));
+    check(v.par_iter().try_fold_with(0, |x, _| Some(x)));
+    check(v.par_iter().inspect(|_| ()));
+    check(v.par_iter().update(|_| ()));
+    check(v.par_iter().interleave(&v));
+    check(v.par_iter().interleave_shortest(&v));
+    check(v.par_iter().intersperse(&-1));
+    check(v.par_iter().chunks(3));
+    check(v.par_iter().map(|x| x));
+    check(v.par_iter().map_with(0, |_, x| x));
+    check(v.par_iter().map_init(|| 0, |_, x| x));
+    check(v.par_iter().panic_fuse());
+    check(v.par_iter().positions(|_| true));
+    check(v.par_iter().rev());
+    check(v.par_iter().skip(1));
+    check(v.par_iter().take(1));
+    check(v.par_iter().map(Some).while_some());
+    check(v.par_iter().with_max_len(1));
+    check(v.par_iter().with_min_len(1));
+    check(v.par_iter().zip(&v));
+    check(v.par_iter().zip_eq(&v));
+    check(v.par_iter().step_by(2));
+}
+
+#[test]
+fn debug_empty() {
+    check(rayon::iter::empty::<i32>());
+}
+
+#[test]
+fn debug_once() {
+    check(rayon::iter::once(10));
+}
+
+#[test]
+fn debug_repeat() {
+    let x: Option<i32> = None;
+    check(rayon::iter::repeat(x));
+    check(rayon::iter::repeatn(x, 10));
+}
+
+#[test]
+fn debug_splitter() {
+    check(rayon::iter::split(0..10, |x| (x, None)));
+}
+
+#[test]
+fn debug_multizip() {
+    let v: &Vec<_> = &(0..10).collect();
+    check((v,).into_par_iter());
+    check((v, v).into_par_iter());
+    check((v, v, v).into_par_iter());
+    check((v, v, v, v).into_par_iter());
+    check((v, v, v, v, v).into_par_iter());
+    check((v, v, v, v, v, v).into_par_iter());
+    check((v, v, v, v, v, v, v).into_par_iter());
+    check((v, v, v, v, v, v, v, v).into_par_iter());
+    check((v, v, v, v, v, v, v, v, v).into_par_iter());
+    check((v, v, v, v, v, v, v, v, v, v).into_par_iter());
+    check((v, v, v, v, v, v, v, v, v, v, v).into_par_iter());
+    check((v, v, v, v, v, v, v, v, v, v, v, v).into_par_iter());
+}
diff --git a/tests/intersperse.rs b/tests/intersperse.rs
new file mode 100644
index 0000000..aaa5b65
--- /dev/null
+++ b/tests/intersperse.rs
@@ -0,0 +1,60 @@
+use rayon::prelude::*;
+
+#[test]
+fn check_intersperse() {
+    let v: Vec<_> = (0..1000).into_par_iter().intersperse(-1).collect();
+    assert_eq!(v.len(), 1999);
+    for (i, x) in v.into_iter().enumerate() {
+        assert_eq!(x, if i % 2 == 0 { i as i32 / 2 } else { -1 });
+    }
+}
+
+#[test]
+fn check_intersperse_again() {
+    let v: Vec<_> = (0..1000)
+        .into_par_iter()
+        .intersperse(-1)
+        .intersperse(-2)
+        .collect();
+    assert_eq!(v.len(), 3997);
+    for (i, x) in v.into_iter().enumerate() {
+        let y = match i % 4 {
+            0 => i as i32 / 4,
+            2 => -1,
+            _ => -2,
+        };
+        assert_eq!(x, y);
+    }
+}
+
+#[test]
+fn check_intersperse_unindexed() {
+    let v: Vec<_> = (0..1000).map(|i| i.to_string()).collect();
+    let s = v.join(",");
+    let s2 = v.join(";");
+    let par: String = s.par_split(',').intersperse(";").collect();
+    assert_eq!(par, s2);
+}
+
+#[test]
+fn check_intersperse_producer() {
+    (0..1000)
+        .into_par_iter()
+        .intersperse(-1)
+        .zip_eq(0..1999)
+        .for_each(|(x, i)| {
+            assert_eq!(x, if i % 2 == 0 { i / 2 } else { -1 });
+        });
+}
+
+#[test]
+fn check_intersperse_rev() {
+    (0..1000)
+        .into_par_iter()
+        .intersperse(-1)
+        .zip_eq(0..1999)
+        .rev()
+        .for_each(|(x, i)| {
+            assert_eq!(x, if i % 2 == 0 { i / 2 } else { -1 });
+        });
+}
diff --git a/tests/issue671-unzip.rs b/tests/issue671-unzip.rs
new file mode 100644
index 0000000..c9af7e6
--- /dev/null
+++ b/tests/issue671-unzip.rs
@@ -0,0 +1,17 @@
+#![type_length_limit = "10000"]
+
+use rayon::prelude::*;
+
+#[test]
+fn type_length_limit() {
+    let input = vec![1, 2, 3, 4, 5];
+    let (indexes, (squares, cubes)): (Vec<_>, (Vec<_>, Vec<_>)) = input
+        .par_iter()
+        .map(|x| (x * x, x * x * x))
+        .enumerate()
+        .unzip();
+
+    drop(indexes);
+    drop(squares);
+    drop(cubes);
+}
diff --git a/tests/issue671.rs b/tests/issue671.rs
new file mode 100644
index 0000000..9d0953f
--- /dev/null
+++ b/tests/issue671.rs
@@ -0,0 +1,16 @@
+#![type_length_limit = "500000"]
+
+use rayon::prelude::*;
+
+#[test]
+fn type_length_limit() {
+    let _ = Vec::<Result<(), ()>>::new()
+        .into_par_iter()
+        .map(|x| x)
+        .map(|x| x)
+        .map(|x| x)
+        .map(|x| x)
+        .map(|x| x)
+        .map(|x| x)
+        .collect::<Result<(), ()>>();
+}
diff --git a/tests/iter_panic.rs b/tests/iter_panic.rs
new file mode 100644
index 0000000..4885a28
--- /dev/null
+++ b/tests/iter_panic.rs
@@ -0,0 +1,52 @@
+use rayon::prelude::*;
+use rayon::ThreadPoolBuilder;
+use std::ops::Range;
+use std::panic::{self, UnwindSafe};
+use std::sync::atomic::{AtomicUsize, Ordering};
+
+const ITER: Range<i32> = 0..0x1_0000;
+const PANIC: i32 = 0xC000;
+
+fn check(&i: &i32) {
+    if i == PANIC {
+        panic!("boom")
+    }
+}
+
+#[test]
+#[should_panic(expected = "boom")]
+fn iter_panic() {
+    ITER.into_par_iter().for_each(|i| check(&i));
+}
+
+#[test]
+fn iter_panic_fuse() {
+    // We only use a single thread in order to make the behavior
+    // of 'panic_fuse' deterministic
+    let pool = ThreadPoolBuilder::new().num_threads(1).build().unwrap();
+
+    pool.install(|| {
+        fn count(iter: impl ParallelIterator + UnwindSafe) -> usize {
+            let count = AtomicUsize::new(0);
+            let result = panic::catch_unwind(|| {
+                iter.for_each(|_| {
+                    count.fetch_add(1, Ordering::Relaxed);
+                });
+            });
+            assert!(result.is_err());
+            count.into_inner()
+        }
+
+        // Without `panic_fuse()`, we'll reach every item except the panicking one.
+        let expected = ITER.len() - 1;
+        let iter = ITER.into_par_iter().with_max_len(1);
+        assert_eq!(count(iter.clone().inspect(check)), expected);
+
+        // With `panic_fuse()` anywhere in the chain, we'll reach fewer items.
+        assert!(count(iter.clone().inspect(check).panic_fuse()) < expected);
+        assert!(count(iter.clone().panic_fuse().inspect(check)) < expected);
+
+        // Try in reverse to be sure we hit the producer case.
+        assert!(count(iter.clone().panic_fuse().inspect(check).rev()) < expected);
+    });
+}
diff --git a/tests/named-threads.rs b/tests/named-threads.rs
new file mode 100644
index 0000000..fd1b0be
--- /dev/null
+++ b/tests/named-threads.rs
@@ -0,0 +1,24 @@
+use std::collections::HashSet;
+
+use rayon::prelude::*;
+use rayon::*;
+
+#[test]
+fn named_threads() {
+    ThreadPoolBuilder::new()
+        .thread_name(|i| format!("hello-name-test-{}", i))
+        .build_global()
+        .unwrap();
+
+    const N: usize = 10000;
+
+    let thread_names = (0..N)
+        .into_par_iter()
+        .flat_map(|_| ::std::thread::current().name().map(str::to_owned))
+        .collect::<HashSet<String>>();
+
+    let all_contains_name = thread_names
+        .iter()
+        .all(|name| name.starts_with("hello-name-test-"));
+    assert!(all_contains_name);
+}
diff --git a/tests/octillion.rs b/tests/octillion.rs
new file mode 100644
index 0000000..cff2b11
--- /dev/null
+++ b/tests/octillion.rs
@@ -0,0 +1,130 @@
+use rayon::prelude::*;
+
+const OCTILLION: u128 = 1_000_000_000_000_000_000_000_000_000;
+
+/// Produce a parallel iterator for 0u128..10²⁷
+fn octillion() -> rayon::range::Iter<u128> {
+    (0..OCTILLION).into_par_iter()
+}
+
+/// Produce a parallel iterator for 0u128..=10²⁷
+fn octillion_inclusive() -> rayon::range_inclusive::Iter<u128> {
+    (0..=OCTILLION).into_par_iter()
+}
+
+/// Produce a parallel iterator for 0u128..10²⁷ using `flat_map`
+fn octillion_flat() -> impl ParallelIterator<Item = u128> {
+    (0u32..1_000_000_000)
+        .into_par_iter()
+        .with_max_len(1_000)
+        .map(|i| u64::from(i) * 1_000_000_000)
+        .flat_map(|i| {
+            (0u32..1_000_000_000)
+                .into_par_iter()
+                .with_max_len(1_000)
+                .map(move |j| i + u64::from(j))
+        })
+        .map(|i| u128::from(i) * 1_000_000_000)
+        .flat_map(|i| {
+            (0u32..1_000_000_000)
+                .into_par_iter()
+                .with_max_len(1_000)
+                .map(move |j| i + u128::from(j))
+        })
+}
+
+// NOTE: `find_first` and `find_last` currently take too long on 32-bit targets,
+// because the `AtomicUsize` match position has much too limited resolution.
+
+#[test]
+#[cfg_attr(not(target_pointer_width = "64"), ignore)]
+fn find_first_octillion() {
+    let x = octillion().find_first(|_| true);
+    assert_eq!(x, Some(0));
+}
+
+#[test]
+#[cfg_attr(not(target_pointer_width = "64"), ignore)]
+fn find_first_octillion_inclusive() {
+    let x = octillion_inclusive().find_first(|_| true);
+    assert_eq!(x, Some(0));
+}
+
+#[test]
+#[cfg_attr(not(target_pointer_width = "64"), ignore)]
+fn find_first_octillion_flat() {
+    let x = octillion_flat().find_first(|_| true);
+    assert_eq!(x, Some(0));
+}
+
+fn two_threads<F: Send + FnOnce() -> R, R: Send>(f: F) -> R {
+    // FIXME: If we don't use at least two threads, then we end up walking
+    // through the entire iterator sequentially, without the benefit of any
+    // short-circuiting.  We probably don't want testing to wait that long. ;)
+    let builder = rayon::ThreadPoolBuilder::new().num_threads(2);
+    let pool = builder.build().unwrap();
+
+    pool.install(f)
+}
+
+#[test]
+#[cfg_attr(not(target_pointer_width = "64"), ignore)]
+fn find_last_octillion() {
+    // It would be nice if `find_last` could prioritize the later splits,
+    // basically flipping the `join` args, without needing indexed `rev`.
+    // (or could we have an unindexed `rev`?)
+    let x = two_threads(|| octillion().find_last(|_| true));
+    assert_eq!(x, Some(OCTILLION - 1));
+}
+
+#[test]
+#[cfg_attr(not(target_pointer_width = "64"), ignore)]
+fn find_last_octillion_inclusive() {
+    let x = two_threads(|| octillion_inclusive().find_last(|_| true));
+    assert_eq!(x, Some(OCTILLION));
+}
+
+#[test]
+#[cfg_attr(not(target_pointer_width = "64"), ignore)]
+fn find_last_octillion_flat() {
+    let x = two_threads(|| octillion_flat().find_last(|_| true));
+    assert_eq!(x, Some(OCTILLION - 1));
+}
+
+#[test]
+fn find_any_octillion() {
+    let x = two_threads(|| octillion().find_any(|x| *x > OCTILLION / 2));
+    assert!(x.is_some());
+}
+
+#[test]
+fn find_any_octillion_flat() {
+    let x = two_threads(|| octillion_flat().find_any(|x| *x > OCTILLION / 2));
+    assert!(x.is_some());
+}
+
+#[test]
+fn filter_find_any_octillion() {
+    let x = two_threads(|| {
+        octillion()
+            .filter(|x| *x > OCTILLION / 2)
+            .find_any(|_| true)
+    });
+    assert!(x.is_some());
+}
+
+#[test]
+fn filter_find_any_octillion_flat() {
+    let x = two_threads(|| {
+        octillion_flat()
+            .filter(|x| *x > OCTILLION / 2)
+            .find_any(|_| true)
+    });
+    assert!(x.is_some());
+}
+
+#[test]
+fn fold_find_any_octillion_flat() {
+    let x = two_threads(|| octillion_flat().fold(|| (), |_, _| ()).find_any(|_| true));
+    assert!(x.is_some());
+}
diff --git a/tests/producer_split_at.rs b/tests/producer_split_at.rs
new file mode 100644
index 0000000..752bc3e
--- /dev/null
+++ b/tests/producer_split_at.rs
@@ -0,0 +1,344 @@
+use rayon::iter::plumbing::*;
+use rayon::prelude::*;
+
+/// Stress-test indexes for `Producer::split_at`.
+fn check<F, I>(expected: &[I::Item], mut f: F)
+where
+    F: FnMut() -> I,
+    I: IntoParallelIterator,
+    I::Iter: IndexedParallelIterator,
+    I::Item: PartialEq + std::fmt::Debug,
+{
+    map_triples(expected.len() + 1, |i, j, k| {
+        Split::forward(f(), i, j, k, expected);
+        Split::reverse(f(), i, j, k, expected);
+    });
+}
+
+fn map_triples<F>(end: usize, mut f: F)
+where
+    F: FnMut(usize, usize, usize),
+{
+    for i in 0..end {
+        for j in i..end {
+            for k in j..end {
+                f(i, j, k);
+            }
+        }
+    }
+}
+
+#[derive(Debug)]
+struct Split {
+    i: usize,
+    j: usize,
+    k: usize,
+    reverse: bool,
+}
+
+impl Split {
+    fn forward<I>(iter: I, i: usize, j: usize, k: usize, expected: &[I::Item])
+    where
+        I: IntoParallelIterator,
+        I::Iter: IndexedParallelIterator,
+        I::Item: PartialEq + std::fmt::Debug,
+    {
+        let result = iter.into_par_iter().with_producer(Split {
+            i,
+            j,
+            k,
+            reverse: false,
+        });
+        assert_eq!(result, expected);
+    }
+
+    fn reverse<I>(iter: I, i: usize, j: usize, k: usize, expected: &[I::Item])
+    where
+        I: IntoParallelIterator,
+        I::Iter: IndexedParallelIterator,
+        I::Item: PartialEq + std::fmt::Debug,
+    {
+        let result = iter.into_par_iter().with_producer(Split {
+            i,
+            j,
+            k,
+            reverse: true,
+        });
+        assert!(result.iter().eq(expected.iter().rev()));
+    }
+}
+
+impl<T> ProducerCallback<T> for Split {
+    type Output = Vec<T>;
+
+    fn callback<P>(self, producer: P) -> Self::Output
+    where
+        P: Producer<Item = T>,
+    {
+        println!("{:?}", self);
+
+        // Splitting the outer indexes first gets us an arbitrary mid section,
+        // which we then split further to get full test coverage.
+        let (left, d) = producer.split_at(self.k);
+        let (a, mid) = left.split_at(self.i);
+        let (b, c) = mid.split_at(self.j - self.i);
+
+        let a = a.into_iter();
+        let b = b.into_iter();
+        let c = c.into_iter();
+        let d = d.into_iter();
+
+        check_len(&a, self.i);
+        check_len(&b, self.j - self.i);
+        check_len(&c, self.k - self.j);
+
+        let chain = a.chain(b).chain(c).chain(d);
+        if self.reverse {
+            chain.rev().collect()
+        } else {
+            chain.collect()
+        }
+    }
+}
+
+fn check_len<I: ExactSizeIterator>(iter: &I, len: usize) {
+    assert_eq!(iter.size_hint(), (len, Some(len)));
+    assert_eq!(iter.len(), len);
+}
+
+// **** Base Producers ****
+
+#[test]
+fn empty() {
+    let v = vec![42];
+    check(&v[..0], rayon::iter::empty);
+}
+
+#[test]
+fn once() {
+    let v = vec![42];
+    check(&v, || rayon::iter::once(42));
+}
+
+#[test]
+fn option() {
+    let v = vec![42];
+    check(&v, || Some(42));
+}
+
+#[test]
+fn range() {
+    let v: Vec<_> = (0..10).collect();
+    check(&v, || 0..10);
+}
+
+#[test]
+fn range_inclusive() {
+    let v: Vec<_> = (0u16..=10).collect();
+    check(&v, || 0u16..=10);
+}
+
+#[test]
+fn repeatn() {
+    let v: Vec<_> = std::iter::repeat(1).take(5).collect();
+    check(&v, || rayon::iter::repeatn(1, 5));
+}
+
+#[test]
+fn slice_iter() {
+    let s: Vec<_> = (0..10).collect();
+    let v: Vec<_> = s.iter().collect();
+    check(&v, || &s);
+}
+
+#[test]
+fn slice_iter_mut() {
+    let mut s: Vec<_> = (0..10).collect();
+    let mut v: Vec<_> = s.clone();
+    let expected: Vec<_> = v.iter_mut().collect();
+
+    map_triples(expected.len() + 1, |i, j, k| {
+        Split::forward(s.par_iter_mut(), i, j, k, &expected);
+        Split::reverse(s.par_iter_mut(), i, j, k, &expected);
+    });
+}
+
+#[test]
+fn slice_chunks() {
+    let s: Vec<_> = (0..10).collect();
+    for len in 1..s.len() + 2 {
+        let v: Vec<_> = s.chunks(len).collect();
+        check(&v, || s.par_chunks(len));
+    }
+}
+
+#[test]
+fn slice_chunks_exact() {
+    let s: Vec<_> = (0..10).collect();
+    for len in 1..s.len() + 2 {
+        let v: Vec<_> = s.chunks_exact(len).collect();
+        check(&v, || s.par_chunks_exact(len));
+    }
+}
+
+#[test]
+fn slice_chunks_mut() {
+    let mut s: Vec<_> = (0..10).collect();
+    let mut v: Vec<_> = s.clone();
+    for len in 1..s.len() + 2 {
+        let expected: Vec<_> = v.chunks_mut(len).collect();
+        map_triples(expected.len() + 1, |i, j, k| {
+            Split::forward(s.par_chunks_mut(len), i, j, k, &expected);
+            Split::reverse(s.par_chunks_mut(len), i, j, k, &expected);
+        });
+    }
+}
+
+#[test]
+fn slice_chunks_exact_mut() {
+    let mut s: Vec<_> = (0..10).collect();
+    let mut v: Vec<_> = s.clone();
+    for len in 1..s.len() + 2 {
+        let expected: Vec<_> = v.chunks_exact_mut(len).collect();
+        map_triples(expected.len() + 1, |i, j, k| {
+            Split::forward(s.par_chunks_exact_mut(len), i, j, k, &expected);
+            Split::reverse(s.par_chunks_exact_mut(len), i, j, k, &expected);
+        });
+    }
+}
+
+#[test]
+fn slice_windows() {
+    let s: Vec<_> = (0..10).collect();
+    let v: Vec<_> = s.windows(2).collect();
+    check(&v, || s.par_windows(2));
+}
+
+#[test]
+fn vec() {
+    let v: Vec<_> = (0..10).collect();
+    check(&v, || v.clone());
+}
+
+// **** Adaptors ****
+
+#[test]
+fn chain() {
+    let v: Vec<_> = (0..10).collect();
+    check(&v, || (0..5).into_par_iter().chain(5..10));
+}
+
+#[test]
+fn cloned() {
+    let v: Vec<_> = (0..10).collect();
+    check(&v, || v.par_iter().cloned());
+}
+
+#[test]
+fn copied() {
+    let v: Vec<_> = (0..10).collect();
+    check(&v, || v.par_iter().copied());
+}
+
+#[test]
+fn enumerate() {
+    let v: Vec<_> = (0..10).enumerate().collect();
+    check(&v, || (0..10).into_par_iter().enumerate());
+}
+
+#[test]
+fn step_by() {
+    let v: Vec<_> = (0..10).step_by(2).collect();
+    check(&v, || (0..10).into_par_iter().step_by(2))
+}
+
+#[test]
+fn step_by_unaligned() {
+    let v: Vec<_> = (0..10).step_by(3).collect();
+    check(&v, || (0..10).into_par_iter().step_by(3))
+}
+
+#[test]
+fn inspect() {
+    let v: Vec<_> = (0..10).collect();
+    check(&v, || (0..10).into_par_iter().inspect(|_| ()));
+}
+
+#[test]
+fn update() {
+    let v: Vec<_> = (0..10).collect();
+    check(&v, || (0..10).into_par_iter().update(|_| ()));
+}
+
+#[test]
+fn interleave() {
+    let v = [0, 10, 1, 11, 2, 12, 3, 4];
+    check(&v, || (0..5).into_par_iter().interleave(10..13));
+    check(&v[..6], || (0..3).into_par_iter().interleave(10..13));
+
+    let v = [0, 10, 1, 11, 2, 12, 13, 14];
+    check(&v, || (0..3).into_par_iter().interleave(10..15));
+}
+
+#[test]
+fn intersperse() {
+    let v = [0, -1, 1, -1, 2, -1, 3, -1, 4];
+    check(&v, || (0..5).into_par_iter().intersperse(-1));
+}
+
+#[test]
+fn chunks() {
+    let s: Vec<_> = (0..10).collect();
+    let v: Vec<_> = s.chunks(2).map(|c| c.to_vec()).collect();
+    check(&v, || s.par_iter().cloned().chunks(2));
+}
+
+#[test]
+fn map() {
+    let v: Vec<_> = (0..10).collect();
+    check(&v, || v.par_iter().map(Clone::clone));
+}
+
+#[test]
+fn map_with() {
+    let v: Vec<_> = (0..10).collect();
+    check(&v, || v.par_iter().map_with(vec![0], |_, &x| x));
+}
+
+#[test]
+fn map_init() {
+    let v: Vec<_> = (0..10).collect();
+    check(&v, || v.par_iter().map_init(|| vec![0], |_, &x| x));
+}
+
+#[test]
+fn panic_fuse() {
+    let v: Vec<_> = (0..10).collect();
+    check(&v, || (0..10).into_par_iter().panic_fuse());
+}
+
+#[test]
+fn rev() {
+    let v: Vec<_> = (0..10).rev().collect();
+    check(&v, || (0..10).into_par_iter().rev());
+}
+
+#[test]
+fn with_max_len() {
+    let v: Vec<_> = (0..10).collect();
+    check(&v, || (0..10).into_par_iter().with_max_len(1));
+}
+
+#[test]
+fn with_min_len() {
+    let v: Vec<_> = (0..10).collect();
+    check(&v, || (0..10).into_par_iter().with_min_len(1));
+}
+
+#[test]
+fn zip() {
+    let v: Vec<_> = (0..10).zip(10..20).collect();
+    check(&v, || (0..10).into_par_iter().zip(10..20));
+    check(&v[..5], || (0..5).into_par_iter().zip(10..20));
+    check(&v[..5], || (0..10).into_par_iter().zip(10..15));
+}
diff --git a/tests/sort-panic-safe.rs b/tests/sort-panic-safe.rs
new file mode 100644
index 0000000..7c50505
--- /dev/null
+++ b/tests/sort-panic-safe.rs
@@ -0,0 +1,162 @@
+use rand::distributions::Uniform;
+use rand::{thread_rng, Rng};
+use rayon::prelude::*;
+use std::cell::Cell;
+use std::cmp::{self, Ordering};
+use std::panic;
+use std::sync::atomic::AtomicUsize;
+use std::sync::atomic::Ordering::Relaxed;
+use std::thread;
+
+static VERSIONS: AtomicUsize = AtomicUsize::new(0);
+
+lazy_static::lazy_static! {
+    static ref DROP_COUNTS: Vec<AtomicUsize> = (0..20_000).map(|_| AtomicUsize::new(0)).collect();
+}
+
+#[derive(Clone, Eq)]
+struct DropCounter {
+    x: u32,
+    id: usize,
+    version: Cell<usize>,
+}
+
+impl PartialEq for DropCounter {
+    fn eq(&self, other: &Self) -> bool {
+        self.partial_cmp(other) == Some(Ordering::Equal)
+    }
+}
+
+impl PartialOrd for DropCounter {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        self.version.set(self.version.get() + 1);
+        other.version.set(other.version.get() + 1);
+        VERSIONS.fetch_add(2, Relaxed);
+        self.x.partial_cmp(&other.x)
+    }
+}
+
+impl Ord for DropCounter {
+    fn cmp(&self, other: &Self) -> Ordering {
+        self.partial_cmp(other).unwrap()
+    }
+}
+
+impl Drop for DropCounter {
+    fn drop(&mut self) {
+        DROP_COUNTS[self.id].fetch_add(1, Relaxed);
+        VERSIONS.fetch_sub(self.version.get(), Relaxed);
+    }
+}
+
+macro_rules! test {
+    ($input:ident, $func:ident) => {
+        let len = $input.len();
+
+        // Work out the total number of comparisons required to sort
+        // this array...
+        let count = AtomicUsize::new(0);
+        $input.to_owned().$func(|a, b| {
+            count.fetch_add(1, Relaxed);
+            a.cmp(b)
+        });
+
+        let mut panic_countdown = count.load(Relaxed);
+        let step = if len <= 100 {
+            1
+        } else {
+            cmp::max(1, panic_countdown / 10)
+        };
+
+        // ... and then panic after each `step` comparisons.
+        loop {
+            // Refresh the counters.
+            VERSIONS.store(0, Relaxed);
+            for i in 0..len {
+                DROP_COUNTS[i].store(0, Relaxed);
+            }
+
+            let v = $input.to_owned();
+            let _ = thread::spawn(move || {
+                let mut v = v;
+                let panic_countdown = AtomicUsize::new(panic_countdown);
+                v.$func(|a, b| {
+                    if panic_countdown.fetch_sub(1, Relaxed) == 1 {
+                        SILENCE_PANIC.with(|s| s.set(true));
+                        panic!();
+                    }
+                    a.cmp(b)
+                })
+            })
+            .join();
+
+            // Check that the number of things dropped is exactly
+            // what we expect (i.e. the contents of `v`).
+            for (i, c) in DROP_COUNTS.iter().enumerate().take(len) {
+                let count = c.load(Relaxed);
+                assert!(
+                    count == 1,
+                    "found drop count == {} for i == {}, len == {}",
+                    count,
+                    i,
+                    len
+                );
+            }
+
+            // Check that the most recent versions of values were dropped.
+            assert_eq!(VERSIONS.load(Relaxed), 0);
+
+            if panic_countdown < step {
+                break;
+            }
+            panic_countdown -= step;
+        }
+    };
+}
+
+thread_local!(static SILENCE_PANIC: Cell<bool> = Cell::new(false));
+
+#[test]
+fn sort_panic_safe() {
+    let prev = panic::take_hook();
+    panic::set_hook(Box::new(move |info| {
+        if !SILENCE_PANIC.with(Cell::get) {
+            prev(info);
+        }
+    }));
+
+    for &len in &[1, 2, 3, 4, 5, 10, 20, 100, 500, 5_000, 20_000] {
+        let len_dist = Uniform::new(0, len);
+        for &modulus in &[5, 30, 1_000, 20_000] {
+            for &has_runs in &[false, true] {
+                let mut rng = thread_rng();
+                let mut input = (0..len)
+                    .map(|id| DropCounter {
+                        x: rng.gen_range(0, modulus),
+                        id,
+                        version: Cell::new(0),
+                    })
+                    .collect::<Vec<_>>();
+
+                if has_runs {
+                    for c in &mut input {
+                        c.x = c.id as u32;
+                    }
+
+                    for _ in 0..5 {
+                        let a = rng.sample(&len_dist);
+                        let b = rng.sample(&len_dist);
+                        if a < b {
+                            input[a..b].reverse();
+                        } else {
+                            input.swap(a, b);
+                        }
+                    }
+                }
+
+                test!(input, par_sort_by);
+                test!(input, par_sort_unstable_by);
+            }
+        }
+    }
+}
diff --git a/tests/str.rs b/tests/str.rs
new file mode 100644
index 0000000..0e1e35e
--- /dev/null
+++ b/tests/str.rs
@@ -0,0 +1,116 @@
+use rand::distributions::Standard;
+use rand::{Rng, SeedableRng};
+use rand_xorshift::XorShiftRng;
+use rayon::prelude::*;
+
+fn seeded_rng() -> XorShiftRng {
+    let mut seed = <XorShiftRng as SeedableRng>::Seed::default();
+    (0..).zip(seed.as_mut()).for_each(|(i, x)| *x = i);
+    XorShiftRng::from_seed(seed)
+}
+
+#[test]
+pub fn execute_strings() {
+    let rng = seeded_rng();
+    let s: String = rng.sample_iter::<char, _>(&Standard).take(1024).collect();
+
+    let par_chars: String = s.par_chars().collect();
+    assert_eq!(s, par_chars);
+
+    let par_even: String = s.par_chars().filter(|&c| (c as u32) & 1 == 0).collect();
+    let ser_even: String = s.chars().filter(|&c| (c as u32) & 1 == 0).collect();
+    assert_eq!(par_even, ser_even);
+
+    // test `FromParallelIterator<&char> for String`
+    let vchars: Vec<char> = s.par_chars().collect();
+    let par_chars: String = vchars.par_iter().collect();
+    assert_eq!(s, par_chars);
+
+    let par_bytes: Vec<u8> = s.par_bytes().collect();
+    assert_eq!(s.as_bytes(), &*par_bytes);
+
+    let par_utf16: Vec<u16> = s.par_encode_utf16().collect();
+    let ser_utf16: Vec<u16> = s.encode_utf16().collect();
+    assert_eq!(par_utf16, ser_utf16);
+
+    let par_charind: Vec<_> = s.par_char_indices().collect();
+    let ser_charind: Vec<_> = s.char_indices().collect();
+    assert_eq!(par_charind, ser_charind);
+}
+
+#[test]
+pub fn execute_strings_split() {
+    // char testcases from examples in `str::split` etc.,
+    // plus a large self-test for good measure.
+    let tests = vec![
+        ("Mary had a little lamb", ' '),
+        ("", 'X'),
+        ("lionXXtigerXleopard", 'X'),
+        ("||||a||b|c", '|'),
+        ("(///)", '/'),
+        ("010", '0'),
+        ("    a  b c", ' '),
+        ("A.B.", '.'),
+        ("A..B..", '.'),
+        ("foo\r\nbar\n\nbaz\n", '\n'),
+        ("foo\nbar\n\r\nbaz", '\n'),
+        ("A few words", ' '),
+        (" Mary   had\ta\u{2009}little  \n\t lamb", ' '),
+        (include_str!("str.rs"), ' '),
+    ];
+
+    for &(string, separator) in &tests {
+        let serial: Vec<_> = string.split(separator).collect();
+        let parallel: Vec<_> = string.par_split(separator).collect();
+        assert_eq!(serial, parallel);
+
+        let serial_fn: Vec<_> = string.split(|c| c == separator).collect();
+        let parallel_fn: Vec<_> = string.par_split(|c| c == separator).collect();
+        assert_eq!(serial_fn, parallel_fn);
+    }
+
+    for &(string, separator) in &tests {
+        let serial: Vec<_> = string.split_terminator(separator).collect();
+        let parallel: Vec<_> = string.par_split_terminator(separator).collect();
+        assert_eq!(serial, parallel);
+    }
+
+    for &(string, separator) in &tests {
+        let serial: Vec<_> = string.split_terminator(|c| c == separator).collect();
+        let parallel: Vec<_> = string.par_split_terminator(|c| c == separator).collect();
+        assert_eq!(serial, parallel);
+    }
+
+    for &(string, _) in &tests {
+        let serial: Vec<_> = string.lines().collect();
+        let parallel: Vec<_> = string.par_lines().collect();
+        assert_eq!(serial, parallel);
+    }
+
+    for &(string, _) in &tests {
+        let serial: Vec<_> = string.split_whitespace().collect();
+        let parallel: Vec<_> = string.par_split_whitespace().collect();
+        assert_eq!(serial, parallel);
+    }
+
+    // try matching separators too!
+    for &(string, separator) in &tests {
+        let serial: Vec<_> = string.matches(separator).collect();
+        let parallel: Vec<_> = string.par_matches(separator).collect();
+        assert_eq!(serial, parallel);
+
+        let serial_fn: Vec<_> = string.matches(|c| c == separator).collect();
+        let parallel_fn: Vec<_> = string.par_matches(|c| c == separator).collect();
+        assert_eq!(serial_fn, parallel_fn);
+    }
+
+    for &(string, separator) in &tests {
+        let serial: Vec<_> = string.match_indices(separator).collect();
+        let parallel: Vec<_> = string.par_match_indices(separator).collect();
+        assert_eq!(serial, parallel);
+
+        let serial_fn: Vec<_> = string.match_indices(|c| c == separator).collect();
+        let parallel_fn: Vec<_> = string.par_match_indices(|c| c == separator).collect();
+        assert_eq!(serial_fn, parallel_fn);
+    }
+}
-- 
cgit v1.2.3