aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2023-07-07 01:05:15 +0000
committerAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2023-07-07 01:05:15 +0000
commit8da1b42f2c14d5e70c2d89abf11505d19bb60967 (patch)
tree1900296c4c4cc8dbc1dcc832ee0e0fcbdcceb88b
parent395101065c0196ac3bbf47bf265b9752ef68f744 (diff)
parenta00441b1ee7ea275114309268c00e3b701b118f7 (diff)
downloadrayon-android14-mainline-tethering-release.tar.gz
Change-Id: Ib87227ea30b3e0de5628f15000ff50ff8323c2c4
-rw-r--r--.cargo_vcs_info.json7
-rw-r--r--Android.bp15
-rw-r--r--Cargo.toml51
-rw-r--r--Cargo.toml.orig20
-rw-r--r--METADATA14
-rw-r--r--README.md38
-rw-r--r--RELEASES.md103
-rw-r--r--TEST_MAPPING3
-rw-r--r--build.rs9
-rw-r--r--examples/README.md3
-rw-r--r--examples/cpu_monitor.rs81
-rw-r--r--src/array.rs9
-rw-r--r--src/collections/mod.rs4
-rw-r--r--src/compile_fail/cannot_collect_filtermap_data.rs12
-rw-r--r--src/compile_fail/cannot_zip_filtered_data.rs10
-rw-r--r--src/compile_fail/cell_par_iter.rs10
-rw-r--r--src/compile_fail/must_use.rs2
-rw-r--r--src/compile_fail/no_send_par_iter.rs30
-rw-r--r--src/compile_fail/rc_par_iter.rs12
-rw-r--r--src/delegate.rs57
-rw-r--r--src/iter/chunks.rs50
-rw-r--r--src/iter/collect/consumer.rs113
-rw-r--r--src/iter/collect/mod.rs159
-rw-r--r--src/iter/collect/test.rs52
-rw-r--r--src/iter/extend.rs398
-rw-r--r--src/iter/filter.rs2
-rw-r--r--src/iter/filter_map.rs2
-rw-r--r--src/iter/find.rs2
-rw-r--r--src/iter/fold_chunks.rs236
-rw-r--r--src/iter/fold_chunks_with.rs231
-rw-r--r--src/iter/inspect.rs2
-rw-r--r--src/iter/interleave.rs18
-rw-r--r--src/iter/len.rs8
-rw-r--r--src/iter/map.rs2
-rw-r--r--src/iter/mod.rs435
-rw-r--r--src/iter/par_bridge.rs159
-rw-r--r--src/iter/plumbing/README.md6
-rw-r--r--src/iter/skip.rs13
-rw-r--r--src/iter/skip_any.rs144
-rw-r--r--src/iter/skip_any_while.rs166
-rw-r--r--src/iter/step_by.rs1
-rw-r--r--src/iter/take_any.rs144
-rw-r--r--src/iter/take_any_while.rs166
-rw-r--r--src/iter/test.rs24
-rw-r--r--src/iter/try_fold.rs68
-rw-r--r--src/iter/try_reduce.rs54
-rw-r--r--src/iter/try_reduce_with.rs60
-rw-r--r--src/iter/update.rs2
-rw-r--r--src/lib.rs43
-rw-r--r--src/math.rs2
-rw-r--r--src/slice/chunks.rs389
-rw-r--r--src/slice/mergesort.rs68
-rw-r--r--src/slice/mod.rs680
-rw-r--r--src/slice/quicksort.rs291
-rw-r--r--src/slice/rchunks.rs386
-rw-r--r--src/slice/test.rs24
-rw-r--r--src/str.rs142
-rw-r--r--src/vec.rs77
-rw-r--r--tests/clones.rs28
-rw-r--r--tests/collect.rs2
-rw-r--r--tests/cross-pool.rs1
-rw-r--r--tests/debug.rs10
-rw-r--r--tests/drain_vec.rs41
-rw-r--r--tests/iter_panic.rs3
-rw-r--r--tests/named-threads.rs1
-rw-r--r--tests/octillion.rs32
-rw-r--r--tests/par_bridge_recursion.rs31
-rw-r--r--tests/producer_split_at.rs44
-rw-r--r--tests/sort-panic-safe.rs10
-rw-r--r--tests/str.rs22
70 files changed, 4064 insertions, 1470 deletions
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json
index c8aa4e4..34dd103 100644
--- a/.cargo_vcs_info.json
+++ b/.cargo_vcs_info.json
@@ -1,5 +1,6 @@
{
"git": {
- "sha1": "ebcb09b1dc53211c6b5abdf4dc5b40e4bcd0a965"
- }
-}
+ "sha1": "6236214d717694917e77aa1c16d91176b9bc2fff"
+ },
+ "path_in_vcs": ""
+} \ No newline at end of file
diff --git a/Android.bp b/Android.bp
index 1f36aa2..1a7835a 100644
--- a/Android.bp
+++ b/Android.bp
@@ -42,16 +42,17 @@ rust_library {
host_supported: true,
crate_name: "rayon",
cargo_env_compat: true,
- cargo_pkg_version: "1.5.1",
+ cargo_pkg_version: "1.7.0",
srcs: ["src/lib.rs"],
- edition: "2018",
- cfgs: [
- "min_const_generics",
- "step_by",
- ],
+ edition: "2021",
rustlibs: [
- "libcrossbeam_deque",
"libeither",
"librayon_core",
],
+ apex_available: [
+ "//apex_available:platform",
+ "//apex_available:anyapex",
+ ],
+ product_available: true,
+ vendor_available: true,
}
diff --git a/Cargo.toml b/Cargo.toml
index 3b8921d..9b0e7b6 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -3,49 +3,50 @@
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
-# to registry (e.g., crates.io) dependencies
+# to registry (e.g., crates.io) dependencies.
#
-# If you believe there's an error in this file please file an
-# issue against the rust-lang/cargo repository. If you're
-# editing this file be aware that the upstream Cargo.toml
-# will likely look very different (and much more reasonable)
+# If you are reading this file be aware that the original Cargo.toml
+# will likely look very different (and much more reasonable).
+# See Cargo.toml.orig for the original contents.
[package]
-edition = "2018"
+edition = "2021"
+rust-version = "1.59"
name = "rayon"
-version = "1.5.1"
-authors = ["Niko Matsakis <niko@alum.mit.edu>", "Josh Stone <cuviper@gmail.com>"]
-exclude = ["/ci/*", "/scripts/*", "/.github/*", "/bors.toml"]
+version = "1.7.0"
+authors = [
+ "Niko Matsakis <niko@alum.mit.edu>",
+ "Josh Stone <cuviper@gmail.com>",
+]
+exclude = [
+ "/ci/*",
+ "/scripts/*",
+ "/.github/*",
+ "/bors.toml",
+]
description = "Simple work-stealing parallelism for Rust"
documentation = "https://docs.rs/rayon/"
readme = "README.md"
-keywords = ["parallel", "thread", "concurrency", "join", "performance"]
+keywords = [
+ "parallel",
+ "thread",
+ "concurrency",
+ "join",
+ "performance",
+]
categories = ["concurrency"]
-license = "Apache-2.0/MIT"
+license = "MIT OR Apache-2.0"
repository = "https://github.com/rayon-rs/rayon"
-[dependencies.crossbeam-deque]
-version = "0.8.0"
[dependencies.either]
version = "1.0"
default-features = false
[dependencies.rayon-core]
-version = "1.9.1"
-[dev-dependencies.docopt]
-version = "1"
-
-[dev-dependencies.lazy_static]
-version = "1"
+version = "1.11.0"
[dev-dependencies.rand]
version = "0.8"
[dev-dependencies.rand_xorshift]
version = "0.3"
-
-[dev-dependencies.serde]
-version = "1.0.85"
-features = ["derive"]
-[build-dependencies.autocfg]
-version = "1"
diff --git a/Cargo.toml.orig b/Cargo.toml.orig
index 6ab9dda..a6ccc97 100644
--- a/Cargo.toml.orig
+++ b/Cargo.toml.orig
@@ -1,12 +1,12 @@
[package]
name = "rayon"
-# Reminder to update html_rool_url in lib.rs when updating version
-version = "1.5.1"
+version = "1.7.0"
authors = ["Niko Matsakis <niko@alum.mit.edu>",
"Josh Stone <cuviper@gmail.com>"]
description = "Simple work-stealing parallelism for Rust"
-edition = "2018"
-license = "Apache-2.0/MIT"
+rust-version = "1.59"
+edition = "2021"
+license = "MIT OR Apache-2.0"
repository = "https://github.com/rayon-rs/rayon"
documentation = "https://docs.rs/rayon/"
readme = "README.md"
@@ -19,8 +19,7 @@ members = ["rayon-demo", "rayon-core"]
exclude = ["ci"]
[dependencies]
-rayon-core = { version = "1.9.1", path = "rayon-core" }
-crossbeam-deque = "0.8.0"
+rayon-core = { version = "1.11.0", path = "rayon-core" }
# This is a public dependency!
[dependencies.either]
@@ -28,14 +27,5 @@ version = "1.0"
default-features = false
[dev-dependencies]
-docopt = "1"
-lazy_static = "1"
rand = "0.8"
rand_xorshift = "0.3"
-
-[dev-dependencies.serde]
-version = "1.0.85"
-features = ["derive"]
-
-[build-dependencies]
-autocfg = "1"
diff --git a/METADATA b/METADATA
index ddf9f80..94b529f 100644
--- a/METADATA
+++ b/METADATA
@@ -1,3 +1,7 @@
+# This project was upgraded with external_updater.
+# Usage: tools/external_updater/updater.sh update rust/crates/rayon
+# For more info, check https://cs.android.com/android/platform/superproject/+/master:tools/external_updater/README.md
+
name: "rayon"
description: "Simple work-stealing parallelism for Rust"
third_party {
@@ -7,13 +11,13 @@ third_party {
}
url {
type: ARCHIVE
- value: "https://static.crates.io/crates/rayon/rayon-1.5.1.crate"
+ value: "https://static.crates.io/crates/rayon/rayon-1.7.0.crate"
}
- version: "1.5.1"
+ version: "1.7.0"
license_type: NOTICE
last_upgrade_date {
- year: 2021
- month: 5
- day: 19
+ year: 2023
+ month: 4
+ day: 3
}
}
diff --git a/README.md b/README.md
index 4c4dff6..7f925bc 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
[![Rayon crate](https://img.shields.io/crates/v/rayon.svg)](https://crates.io/crates/rayon)
[![Rayon documentation](https://docs.rs/rayon/badge.svg)](https://docs.rs/rayon)
-![minimum rustc 1.36](https://img.shields.io/badge/rustc-1.36+-red.svg)
+![minimum rustc 1.59](https://img.shields.io/badge/rustc-1.59+-red.svg)
[![build status](https://github.com/rayon-rs/rayon/workflows/master/badge.svg)](https://github.com/rayon-rs/rayon/actions)
[![Join the chat at https://gitter.im/rayon-rs/Lobby](https://badges.gitter.im/rayon-rs/Lobby.svg)](https://gitter.im/rayon-rs/Lobby?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
@@ -13,7 +13,7 @@ enjoy [this blog post][blog] about Rayon, which gives more background
and details about how it works, or [this video][video], from the Rust
Belt Rust conference.) Rayon is
[available on crates.io](https://crates.io/crates/rayon), and
-[API Documentation is available on docs.rs](https://docs.rs/rayon/).
+[API documentation is available on docs.rs](https://docs.rs/rayon).
[blog]: https://smallcultfollowing.com/babysteps/blog/2015/12/18/rayon-data-parallelism-in-rust/
[video]: https://www.youtube.com/watch?v=gof_OEv71Aw
@@ -71,12 +71,12 @@ as:
```toml
[dependencies]
-rayon = "1.5"
+rayon = "1.7"
```
-To use the Parallel Iterator APIs, a number of traits have to be in
+To use the parallel iterator APIs, a number of traits have to be in
scope. The easiest way to bring those things into scope is to use the
-[Rayon prelude](https://docs.rs/rayon/*/rayon/prelude/index.html). In
+[Rayon prelude](https://docs.rs/rayon/*/rayon/prelude/index.html). In
each module where you would like to use the parallel iterator APIs,
just add:
@@ -84,17 +84,37 @@ just add:
use rayon::prelude::*;
```
-Rayon currently requires `rustc 1.36.0` or greater.
+Rayon currently requires `rustc 1.59.0` or greater.
+
+### Usage with WebAssembly
+
+Rayon can work on the Web via WebAssembly, but requires an adapter and
+some project configuration to account for differences between
+WebAssembly threads and threads on the other platforms.
+
+Check out the
+[wasm-bindgen-rayon](https://github.com/GoogleChromeLabs/wasm-bindgen-rayon)
+docs for more details.
## Contribution
-Rayon is an open source project! If you'd like to contribute to Rayon, check out [the list of "help wanted" issues](https://github.com/rayon-rs/rayon/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22). These are all (or should be) issues that are suitable for getting started, and they generally include a detailed set of instructions for what to do. Please ask questions if anything is unclear! Also, check out the [Guide to Development](https://github.com/rayon-rs/rayon/wiki/Guide-to-Development) page on the wiki. Note that all code submitted in PRs to Rayon is assumed to [be licensed under Rayon's dual MIT/Apache2 licensing](https://github.com/rayon-rs/rayon/blob/master/README.md#license).
+Rayon is an open source project! If you'd like to contribute to Rayon,
+check out
+[the list of "help wanted" issues](https://github.com/rayon-rs/rayon/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22).
+These are all (or should be) issues that are suitable for getting
+started, and they generally include a detailed set of instructions for
+what to do. Please ask questions if anything is unclear! Also, check
+out the
+[Guide to Development](https://github.com/rayon-rs/rayon/wiki/Guide-to-Development)
+page on the wiki. Note that all code submitted in PRs to Rayon is
+assumed to
+[be licensed under Rayon's dual MIT/Apache 2.0 licensing](https://github.com/rayon-rs/rayon/blob/master/README.md#license).
## Quick demo
To see Rayon in action, check out the `rayon-demo` directory, which
includes a number of demos of code using Rayon. For example, run this
-command to get a visualization of an nbody simulation. To see the
+command to get a visualization of an N-body simulation. To see the
effect of using Rayon, press `s` to run sequentially and `p` to run in
parallel.
@@ -120,5 +140,5 @@ See [the Rayon FAQ][faq].
Rayon is distributed under the terms of both the MIT license and the
Apache License (Version 2.0). See [LICENSE-APACHE](LICENSE-APACHE) and
-[LICENSE-MIT](LICENSE-MIT) for details. Opening a pull requests is
+[LICENSE-MIT](LICENSE-MIT) for details. Opening a pull request is
assumed to signal agreement with these licensing terms.
diff --git a/RELEASES.md b/RELEASES.md
index 0299436..28b476d 100644
--- a/RELEASES.md
+++ b/RELEASES.md
@@ -1,3 +1,106 @@
+# Release rayon 1.7.0 / rayon-core 1.11.0 (2023-03-03)
+
+- The minimum supported `rustc` is now 1.59.
+- Added a fallback when threading is unsupported.
+- The new `ParallelIterator::take_any` and `skip_any` methods work like
+ unordered `IndexedParallelIterator::take` and `skip`, counting items in
+ whatever order they are visited in parallel.
+- The new `ParallelIterator::take_any_while` and `skip_any_while` methods work
+ like unordered `Iterator::take_while` and `skip_while`, which previously had
+ no parallel equivalent. The "while" condition may be satisfied from anywhere
+ in the parallel iterator, affecting all future items regardless of position.
+- The new `yield_now` and `yield_local` functions will cooperatively yield
+ execution to Rayon, either trying to execute pending work from the entire
+ pool or from just the local deques of the current thread, respectively.
+
+# Release rayon-core 1.10.2 (2023-01-22)
+
+- Fixed miri-reported UB for SharedReadOnly tags protected by a call.
+
+# Release rayon 1.6.1 (2022-12-09)
+
+- Simplified `par_bridge` to only pull one item at a time from the iterator,
+ without batching. Threads that are waiting for iterator items will now block
+ appropriately rather than spinning CPU. (Thanks @njaard!)
+- Added protection against recursion in `par_bridge`, so iterators that also
+ invoke rayon will not cause mutex recursion deadlocks.
+
+# Release rayon-core 1.10.1 (2022-11-18)
+
+- Fixed a race condition with threads going to sleep while a broadcast starts.
+
+# Release rayon 1.6.0 / rayon-core 1.10.0 (2022-11-18)
+
+- The minimum supported `rustc` is now 1.56.
+- The new `IndexedParallelIterator::fold_chunks` and `fold_chunks_with` methods
+ work like `ParallelIterator::fold` and `fold_with` with fixed-size chunks of
+ items. This may be useful for predictable batching performance, without the
+ allocation overhead of `IndexedParallelIterator::chunks`.
+- New "broadcast" methods run a given function on all threads in the pool.
+ These run at a sort of reduced priority after each thread has exhausted their
+ local work queue, but before they attempt work-stealing from other threads.
+ - The global `broadcast` function and `ThreadPool::broadcast` method will
+ block until completion, returning a `Vec` of all return values.
+ - The global `spawn_broadcast` function and methods on `ThreadPool`, `Scope`,
+ and `ScopeFifo` will run detached, without blocking the current thread.
+- Panicking methods now use `#[track_caller]` to report the caller's location.
+- Fixed a truncated length in `vec::Drain` when given an empty range.
+
+## Contributors
+
+Thanks to all of the contributors for this release!
+
+- @cuviper
+- @idanmuze
+- @JoeyBF
+- @JustForFun88
+- @kianmeng
+- @kornelski
+- @ritchie46
+- @ryanrussell
+- @steffahn
+- @TheIronBorn
+- @willcrozi
+
+# Release rayon 1.5.3 (2022-05-13)
+
+- The new `ParallelSliceMut::par_sort_by_cached_key` is a stable sort that caches
+ the keys for each item -- a parallel version of `slice::sort_by_cached_key`.
+
+# Release rayon-core 1.9.3 (2022-05-13)
+
+- Fixed a use-after-free race in job notification.
+
+# Release rayon 1.5.2 / rayon-core 1.9.2 (2022-04-13)
+
+- The new `ParallelSlice::par_rchunks()` and `par_rchunks_exact()` iterate
+ slice chunks in reverse, aligned the against the end of the slice if the
+ length is not a perfect multiple of the chunk size. The new
+ `ParallelSliceMut::par_rchunks_mut()` and `par_rchunks_exact_mut()` are the
+ same for mutable slices.
+- The `ParallelIterator::try_*` methods now support `std::ops::ControlFlow` and
+ `std::task::Poll` items, mirroring the unstable `Try` implementations in the
+ standard library.
+- The `ParallelString` pattern-based methods now support `&[char]` patterns,
+ which match when any character in that slice is found in the string.
+- A soft limit is now enforced on the number of threads allowed in a single
+ thread pool, respecting internal bit limits that already existed. The current
+ maximum is publicly available from the new function `max_num_threads()`.
+- Fixed several Stacked Borrow and provenance issues found by `cargo miri`.
+
+## Contributors
+
+Thanks to all of the contributors for this release!
+
+- @atouchet
+- @bluss
+- @cuviper
+- @fzyzcjy
+- @nyanzebra
+- @paolobarbolini
+- @RReverser
+- @saethlin
+
# Release rayon 1.5.1 / rayon-core 1.9.1 (2021-05-18)
- The new `in_place_scope` and `in_place_scope_fifo` are variations of `scope`
diff --git a/TEST_MAPPING b/TEST_MAPPING
index 3cbd48d..55384f0 100644
--- a/TEST_MAPPING
+++ b/TEST_MAPPING
@@ -5,6 +5,9 @@
"path": "external/rust/crates/base64"
},
{
+ "path": "external/rust/crates/hashbrown"
+ },
+ {
"path": "external/rust/crates/tinytemplate"
},
{
diff --git a/build.rs b/build.rs
deleted file mode 100644
index e52e326..0000000
--- a/build.rs
+++ /dev/null
@@ -1,9 +0,0 @@
-fn main() {
- let ac = autocfg::new();
- if ac.probe_expression("(0..10).step_by(2).rev()") {
- autocfg::emit("step_by");
- }
- if ac.probe_expression("{ fn foo<const N: usize>() {} }") {
- autocfg::emit("min_const_generics");
- }
-}
diff --git a/examples/README.md b/examples/README.md
deleted file mode 100644
index ec08a4d..0000000
--- a/examples/README.md
+++ /dev/null
@@ -1,3 +0,0 @@
-We use this directory for interactive tests that can't be run in an
-automatic fashion. For examples of how to use Rayon, or benchmarks,
-see `rayon-demo`.
diff --git a/examples/cpu_monitor.rs b/examples/cpu_monitor.rs
deleted file mode 100644
index bbe1316..0000000
--- a/examples/cpu_monitor.rs
+++ /dev/null
@@ -1,81 +0,0 @@
-use docopt::Docopt;
-use std::io;
-use std::process;
-
-const USAGE: &str = "
-Usage: cpu_monitor [options] <scenario>
- cpu_monitor --help
-
-A test for monitoring how much CPU usage Rayon consumes under various
-scenarios. This test is intended to be executed interactively, like so:
-
- cargo run --example cpu_monitor -- tasks_ended
-
-The list of scenarios you can try are as follows:
-
-- tasks_ended: after all tasks have finished, go to sleep
-- task_stall_root: a root task stalls for a very long time
-- task_stall_scope: a task in a scope stalls for a very long time
-
-Options:
- -h, --help Show this message.
- -d N, --depth N Control how hard the dummy task works [default: 27]
-";
-
-#[derive(serde::Deserialize)]
-pub struct Args {
- arg_scenario: String,
- flag_depth: usize,
-}
-
-fn main() {
- let args: &Args = &Docopt::new(USAGE)
- .and_then(|d| d.deserialize())
- .unwrap_or_else(|e| e.exit());
-
- match &args.arg_scenario[..] {
- "tasks_ended" => tasks_ended(args),
- "task_stall_root" => task_stall_root(args),
- "task_stall_scope" => task_stall_scope(args),
- _ => {
- println!("unknown scenario: `{}`", args.arg_scenario);
- println!("try --help");
- process::exit(1);
- }
- }
-}
-
-fn wait_for_user() {
- let mut input = String::new();
- io::stdin().read_line(&mut input).unwrap();
-}
-
-fn task(args: &Args) {
- fn join_recursively(n: usize) {
- if n == 0 {
- return;
- }
- rayon::join(|| join_recursively(n - 1), || join_recursively(n - 1));
- }
-
- println!("Starting heavy work at depth {}...wait.", args.flag_depth);
- join_recursively(args.flag_depth);
- println!("Heavy work done; check top. You should see CPU usage drop to zero soon.");
- println!("Press <enter> to quit...");
-}
-
-fn tasks_ended(args: &Args) {
- task(args);
- wait_for_user();
-}
-
-fn task_stall_root(args: &Args) {
- rayon::join(|| task(args), wait_for_user);
-}
-
-fn task_stall_scope(args: &Args) {
- rayon::scope(|scope| {
- scope.spawn(move |_| task(args));
- scope.spawn(move |_| wait_for_user());
- });
-}
diff --git a/src/array.rs b/src/array.rs
index 7922cd6..32a5fdd 100644
--- a/src/array.rs
+++ b/src/array.rs
@@ -1,11 +1,8 @@
-#![cfg(min_const_generics)]
//! Parallel iterator types for [arrays] (`[T; N]`)
//!
//! You will rarely need to interact with this module directly unless you need
//! to name one of the iterator types.
//!
-//! Everything in this module requires const generics, stabilized in Rust 1.51.
-//!
//! [arrays]: https://doc.rust-lang.org/std/primitive.array.html
use crate::iter::plumbing::*;
@@ -14,7 +11,6 @@ use crate::slice::{Iter, IterMut};
use crate::vec::DrainProducer;
use std::mem::ManuallyDrop;
-/// This implementation requires const generics, stabilized in Rust 1.51.
impl<'data, T: Sync + 'data, const N: usize> IntoParallelIterator for &'data [T; N] {
type Item = &'data T;
type Iter = Iter<'data, T>;
@@ -24,7 +20,6 @@ impl<'data, T: Sync + 'data, const N: usize> IntoParallelIterator for &'data [T;
}
}
-/// This implementation requires const generics, stabilized in Rust 1.51.
impl<'data, T: Send + 'data, const N: usize> IntoParallelIterator for &'data mut [T; N] {
type Item = &'data mut T;
type Iter = IterMut<'data, T>;
@@ -34,7 +29,6 @@ impl<'data, T: Send + 'data, const N: usize> IntoParallelIterator for &'data mut
}
}
-/// This implementation requires const generics, stabilized in Rust 1.51.
impl<T: Send, const N: usize> IntoParallelIterator for [T; N] {
type Item = T;
type Iter = IntoIter<T, N>;
@@ -84,7 +78,8 @@ impl<T: Send, const N: usize> IndexedParallelIterator for IntoIter<T, N> {
unsafe {
// Drain every item, and then the local array can just fall out of scope.
let mut array = ManuallyDrop::new(self.array);
- callback.callback(DrainProducer::new(&mut *array))
+ let producer = DrainProducer::new(array.as_mut_slice());
+ callback.callback(producer)
}
}
}
diff --git a/src/collections/mod.rs b/src/collections/mod.rs
index d9b7988..3c99f32 100644
--- a/src/collections/mod.rs
+++ b/src/collections/mod.rs
@@ -56,7 +56,7 @@ mod drain_guard {
pub(super) fn new(collection: &'a mut C) -> Self {
Self {
// Temporarily steal the inner `Vec` so we can drain in place.
- vec: Vec::from(mem::replace(collection, C::default())),
+ vec: Vec::from(mem::take(collection)),
collection,
}
}
@@ -65,7 +65,7 @@ mod drain_guard {
impl<'a, T, C: From<Vec<T>>> Drop for DrainGuard<'a, T, C> {
fn drop(&mut self) {
// Restore the collection from the `Vec` with its original capacity.
- *self.collection = C::from(mem::replace(&mut self.vec, Vec::new()));
+ *self.collection = C::from(mem::take(&mut self.vec));
}
}
diff --git a/src/compile_fail/cannot_collect_filtermap_data.rs b/src/compile_fail/cannot_collect_filtermap_data.rs
index 65ff875..bb62ce0 100644
--- a/src/compile_fail/cannot_collect_filtermap_data.rs
+++ b/src/compile_fail/cannot_collect_filtermap_data.rs
@@ -5,12 +5,10 @@ use rayon::prelude::*;
// zip requires data of exact size, but filter yields only bounded
// size, so check that we cannot apply it.
-fn main() {
- let a: Vec<usize> = (0..1024).collect();
- let mut v = vec![];
- a.par_iter()
- .filter_map(|&x| Some(x as f32))
- .collect_into_vec(&mut v); //~ ERROR no method
-}
+let a: Vec<usize> = (0..1024).collect();
+let mut v = vec![];
+a.par_iter()
+ .filter_map(|&x| Some(x as f32))
+ .collect_into_vec(&mut v); //~ ERROR no method
``` */
diff --git a/src/compile_fail/cannot_zip_filtered_data.rs b/src/compile_fail/cannot_zip_filtered_data.rs
index de43fca..54fd50d 100644
--- a/src/compile_fail/cannot_zip_filtered_data.rs
+++ b/src/compile_fail/cannot_zip_filtered_data.rs
@@ -5,12 +5,10 @@ use rayon::prelude::*;
// zip requires data of exact size, but filter yields only bounded
// size, so check that we cannot apply it.
-fn main() {
- let mut a: Vec<usize> = (0..1024).rev().collect();
- let b: Vec<usize> = (0..1024).collect();
+let mut a: Vec<usize> = (0..1024).rev().collect();
+let b: Vec<usize> = (0..1024).collect();
- a.par_iter()
- .zip(b.par_iter().filter(|&&x| x > 3)); //~ ERROR
-}
+a.par_iter()
+ .zip(b.par_iter().filter(|&&x| x > 3)); //~ ERROR
``` */
diff --git a/src/compile_fail/cell_par_iter.rs b/src/compile_fail/cell_par_iter.rs
index 4af04b1..98a1cf9 100644
--- a/src/compile_fail/cell_par_iter.rs
+++ b/src/compile_fail/cell_par_iter.rs
@@ -5,11 +5,9 @@
use rayon::prelude::*;
use std::cell::Cell;
-fn main() {
- let c = Cell::new(42_i32);
- (0_i32..1024).into_par_iter()
- .map(|_| c.get()) //~ ERROR E0277
- .min();
-}
+let c = Cell::new(42_i32);
+(0_i32..1024).into_par_iter()
+ .map(|_| c.get()) //~ ERROR E0277
+ .min();
``` */
diff --git a/src/compile_fail/must_use.rs b/src/compile_fail/must_use.rs
index ac50a62..b4b3d77 100644
--- a/src/compile_fail/must_use.rs
+++ b/src/compile_fail/must_use.rs
@@ -33,6 +33,8 @@ must_use! {
step_by /** v.par_iter().step_by(2); */
chain /** v.par_iter().chain(&v); */
chunks /** v.par_iter().chunks(2); */
+ fold_chunks /** v.par_iter().fold_chunks(2, || 0, |x, _| x); */
+ fold_chunks_with /** v.par_iter().fold_chunks_with(2, 0, |x, _| x); */
cloned /** v.par_iter().cloned(); */
copied /** v.par_iter().copied(); */
enumerate /** v.par_iter().enumerate(); */
diff --git a/src/compile_fail/no_send_par_iter.rs b/src/compile_fail/no_send_par_iter.rs
index 1362c98..7573c03 100644
--- a/src/compile_fail/no_send_par_iter.rs
+++ b/src/compile_fail/no_send_par_iter.rs
@@ -10,13 +10,11 @@ struct NoSend(*const ());
unsafe impl Sync for NoSend {}
-fn main() {
- let x = Some(NoSend(null()));
+let x = Some(NoSend(null()));
- x.par_iter()
- .map(|&x| x) //~ ERROR
- .count(); //~ ERROR
-}
+x.par_iter()
+ .map(|&x| x) //~ ERROR
+ .count(); //~ ERROR
``` */
mod map {}
@@ -31,13 +29,11 @@ struct NoSend(*const ());
unsafe impl Sync for NoSend {}
-fn main() {
- let x = Some(NoSend(null()));
+let x = Some(NoSend(null()));
- x.par_iter()
- .filter_map(|&x| Some(x)) //~ ERROR
- .count(); //~ ERROR
-}
+x.par_iter()
+ .filter_map(|&x| Some(x)) //~ ERROR
+ .count(); //~ ERROR
``` */
mod filter_map {}
@@ -52,13 +48,11 @@ struct NoSend(*const ());
unsafe impl Sync for NoSend {}
-fn main() {
- let x = Some(NoSend(null()));
+let x = Some(NoSend(null()));
- x.par_iter()
- .cloned() //~ ERROR
- .count(); //~ ERROR
-}
+x.par_iter()
+ .cloned() //~ ERROR
+ .count(); //~ ERROR
``` */
mod cloned {}
diff --git a/src/compile_fail/rc_par_iter.rs b/src/compile_fail/rc_par_iter.rs
index feaedb3..aca63e7 100644
--- a/src/compile_fail/rc_par_iter.rs
+++ b/src/compile_fail/rc_par_iter.rs
@@ -6,12 +6,10 @@
use rayon::prelude::*;
use std::rc::Rc;
-fn main() {
- let x = vec![Rc::new(22), Rc::new(23)];
- let mut y = vec![];
- x.into_par_iter() //~ ERROR no method named `into_par_iter`
- .map(|rc| *rc)
- .collect_into_vec(&mut y);
-}
+let x = vec![Rc::new(22), Rc::new(23)];
+let mut y = vec![];
+x.into_par_iter() //~ ERROR no method named `into_par_iter`
+ .map(|rc| *rc)
+ .collect_into_vec(&mut y);
``` */
diff --git a/src/delegate.rs b/src/delegate.rs
index a537489..2a6e331 100644
--- a/src/delegate.rs
+++ b/src/delegate.rs
@@ -8,15 +8,6 @@
/// declared with an `inner` field.
///
/// The implementation of `IntoParallelIterator` should be added separately.
-///
-/// # Example
-///
-/// ```
-/// delegate_iterator!{
-/// MyIntoIter<T, U> => (T, U),
-/// impl<T: Ord + Send, U: Send>
-/// }
-/// ```
macro_rules! delegate_iterator {
($iter:ty => $item:ty ,
impl $( $args:tt )*
@@ -68,3 +59,51 @@ macro_rules! delegate_indexed_iterator {
}
}
}
+
+#[test]
+fn unindexed_example() {
+ use crate::collections::btree_map::IntoIter;
+ use crate::iter::plumbing::*;
+ use crate::prelude::*;
+
+ use std::collections::BTreeMap;
+
+ struct MyIntoIter<T: Ord + Send, U: Send> {
+ inner: IntoIter<T, U>,
+ }
+
+ delegate_iterator! {
+ MyIntoIter<T, U> => (T, U),
+ impl<T: Ord + Send, U: Send>
+ }
+
+ let map = BTreeMap::from([(1, 'a'), (2, 'b'), (3, 'c')]);
+ let iter = MyIntoIter {
+ inner: map.into_par_iter(),
+ };
+ let vec: Vec<_> = iter.map(|(k, _)| k).collect();
+ assert_eq!(vec, &[1, 2, 3]);
+}
+
+#[test]
+fn indexed_example() {
+ use crate::iter::plumbing::*;
+ use crate::prelude::*;
+ use crate::vec::IntoIter;
+
+ struct MyIntoIter<T: Send> {
+ inner: IntoIter<T>,
+ }
+
+ delegate_indexed_iterator! {
+ MyIntoIter<T> => T,
+ impl<T: Send>
+ }
+
+ let iter = MyIntoIter {
+ inner: vec![1, 2, 3].into_par_iter(),
+ };
+ let mut vec = vec![];
+ iter.collect_into_vec(&mut vec);
+ assert_eq!(vec, &[1, 2, 3]);
+}
diff --git a/src/iter/chunks.rs b/src/iter/chunks.rs
index be5f84c..ec48278 100644
--- a/src/iter/chunks.rs
+++ b/src/iter/chunks.rs
@@ -90,38 +90,46 @@ where
where
P: Producer<Item = T>,
{
- self.callback.callback(ChunkProducer {
- chunk_size: self.size,
- len: self.len,
- base,
- })
+ let producer = ChunkProducer::new(self.size, self.len, base, Vec::from_iter);
+ self.callback.callback(producer)
}
}
}
}
-struct ChunkProducer<P>
-where
- P: Producer,
-{
+pub(super) struct ChunkProducer<P, F> {
chunk_size: usize,
len: usize,
base: P,
+ map: F,
}
-impl<P> Producer for ChunkProducer<P>
+impl<P, F> ChunkProducer<P, F> {
+ pub(super) fn new(chunk_size: usize, len: usize, base: P, map: F) -> Self {
+ Self {
+ chunk_size,
+ len,
+ base,
+ map,
+ }
+ }
+}
+
+impl<P, F, T> Producer for ChunkProducer<P, F>
where
P: Producer,
+ F: Fn(P::IntoIter) -> T + Send + Clone,
{
- type Item = Vec<P::Item>;
- type IntoIter = ChunkSeq<P>;
+ type Item = T;
+ type IntoIter = std::iter::Map<ChunkSeq<P>, F>;
fn into_iter(self) -> Self::IntoIter {
- ChunkSeq {
+ let chunks = ChunkSeq {
chunk_size: self.chunk_size,
len: self.len,
inner: if self.len > 0 { Some(self.base) } else { None },
- }
+ };
+ chunks.map(self.map)
}
fn split_at(self, index: usize) -> (Self, Self) {
@@ -132,11 +140,13 @@ where
chunk_size: self.chunk_size,
len: elem_index,
base: left,
+ map: self.map.clone(),
},
ChunkProducer {
chunk_size: self.chunk_size,
len: self.len - elem_index,
base: right,
+ map: self.map,
},
)
}
@@ -150,7 +160,7 @@ where
}
}
-struct ChunkSeq<P> {
+pub(super) struct ChunkSeq<P> {
chunk_size: usize,
len: usize,
inner: Option<P>,
@@ -160,7 +170,7 @@ impl<P> Iterator for ChunkSeq<P>
where
P: Producer,
{
- type Item = Vec<P::Item>;
+ type Item = P::IntoIter;
fn next(&mut self) -> Option<Self::Item> {
let producer = self.inner.take()?;
@@ -168,11 +178,11 @@ where
let (left, right) = producer.split_at(self.chunk_size);
self.inner = Some(right);
self.len -= self.chunk_size;
- Some(left.into_iter().collect())
+ Some(left.into_iter())
} else {
debug_assert!(self.len > 0);
self.len = 0;
- Some(producer.into_iter().collect())
+ Some(producer.into_iter())
}
}
@@ -206,11 +216,11 @@ where
let (left, right) = producer.split_at(self.len - size);
self.inner = Some(left);
self.len -= size;
- Some(right.into_iter().collect())
+ Some(right.into_iter())
} else {
debug_assert!(self.len > 0);
self.len = 0;
- Some(producer.into_iter().collect())
+ Some(producer.into_iter())
}
}
}
diff --git a/src/iter/collect/consumer.rs b/src/iter/collect/consumer.rs
index 3a8eea0..acd67df 100644
--- a/src/iter/collect/consumer.rs
+++ b/src/iter/collect/consumer.rs
@@ -1,19 +1,38 @@
use super::super::plumbing::*;
+use crate::SendPtr;
use std::marker::PhantomData;
-use std::mem::MaybeUninit;
use std::ptr;
use std::slice;
pub(super) struct CollectConsumer<'c, T: Send> {
- /// A slice covering the target memory, not yet initialized!
- target: &'c mut [MaybeUninit<T>],
+ /// See `CollectResult` for explanation of why this is not a slice
+ start: SendPtr<T>,
+ len: usize,
+ marker: PhantomData<&'c mut T>,
+}
+
+impl<T: Send> CollectConsumer<'_, T> {
+ /// Create a collector for `len` items in the unused capacity of the vector.
+ pub(super) fn appender(vec: &mut Vec<T>, len: usize) -> CollectConsumer<'_, T> {
+ let start = vec.len();
+ assert!(vec.capacity() - start >= len);
+
+ // SAFETY: We already made sure to have the additional space allocated.
+ // The pointer is derived from `Vec` directly, not through a `Deref`,
+ // so it has provenance over the whole allocation.
+ unsafe { CollectConsumer::new(vec.as_mut_ptr().add(start), len) }
+ }
}
impl<'c, T: Send + 'c> CollectConsumer<'c, T> {
/// The target memory is considered uninitialized, and will be
/// overwritten without reading or dropping existing values.
- pub(super) fn new(target: &'c mut [MaybeUninit<T>]) -> Self {
- CollectConsumer { target }
+ unsafe fn new(start: *mut T, len: usize) -> Self {
+ CollectConsumer {
+ start: SendPtr(start),
+ len,
+ marker: PhantomData,
+ }
}
}
@@ -23,10 +42,13 @@ impl<'c, T: Send + 'c> CollectConsumer<'c, T> {
/// the elements will be dropped, unless its ownership is released before then.
#[must_use]
pub(super) struct CollectResult<'c, T> {
- /// A slice covering the target memory, initialized up to our separate `len`.
- target: &'c mut [MaybeUninit<T>],
- /// The current initialized length in `target`
- len: usize,
+ /// This pointer and length has the same representation as a slice,
+ /// but retains the provenance of the entire array so that we can merge
+ /// these regions together in `CollectReducer`.
+ start: SendPtr<T>,
+ total_len: usize,
+ /// The current initialized length after `start`
+ initialized_len: usize,
/// Lifetime invariance guarantees that the data flows from consumer to result,
/// especially for the `scope_fn` callback in `Collect::with_consumer`.
invariant_lifetime: PhantomData<&'c mut &'c mut [T]>,
@@ -37,25 +59,26 @@ unsafe impl<'c, T> Send for CollectResult<'c, T> where T: Send {}
impl<'c, T> CollectResult<'c, T> {
/// The current length of the collect result
pub(super) fn len(&self) -> usize {
- self.len
+ self.initialized_len
}
/// Release ownership of the slice of elements, and return the length
pub(super) fn release_ownership(mut self) -> usize {
- let ret = self.len;
- self.len = 0;
+ let ret = self.initialized_len;
+ self.initialized_len = 0;
ret
}
}
impl<'c, T> Drop for CollectResult<'c, T> {
fn drop(&mut self) {
- // Drop the first `self.len` elements, which have been recorded
+ // Drop the first `self.initialized_len` elements, which have been recorded
// to be initialized by the folder.
unsafe {
- // TODO: use `MaybeUninit::slice_as_mut_ptr`
- let start = self.target.as_mut_ptr() as *mut T;
- ptr::drop_in_place(slice::from_raw_parts_mut(start, self.len));
+ ptr::drop_in_place(slice::from_raw_parts_mut(
+ self.start.0,
+ self.initialized_len,
+ ));
}
}
}
@@ -66,24 +89,27 @@ impl<'c, T: Send + 'c> Consumer<T> for CollectConsumer<'c, T> {
type Result = CollectResult<'c, T>;
fn split_at(self, index: usize) -> (Self, Self, CollectReducer) {
- let CollectConsumer { target } = self;
-
- // Produce new consumers. Normal slicing ensures that the
- // memory range given to each consumer is disjoint.
- let (left, right) = target.split_at_mut(index);
- (
- CollectConsumer::new(left),
- CollectConsumer::new(right),
- CollectReducer,
- )
+ let CollectConsumer { start, len, .. } = self;
+
+ // Produce new consumers.
+ // SAFETY: This assert checks that `index` is a valid offset for `start`
+ unsafe {
+ assert!(index <= len);
+ (
+ CollectConsumer::new(start.0, index),
+ CollectConsumer::new(start.0.add(index), len - index),
+ CollectReducer,
+ )
+ }
}
fn into_folder(self) -> Self::Folder {
// Create a result/folder that consumes values and writes them
- // into target. The initial result has length 0.
+ // into the region after start. The initial result has length 0.
CollectResult {
- target: self.target,
- len: 0,
+ start: self.start,
+ total_len: self.len,
+ initialized_len: 0,
invariant_lifetime: PhantomData,
}
}
@@ -97,15 +123,17 @@ impl<'c, T: Send + 'c> Folder<T> for CollectResult<'c, T> {
type Result = Self;
fn consume(mut self, item: T) -> Self {
- let dest = self
- .target
- .get_mut(self.len)
- .expect("too many values pushed to consumer");
+ assert!(
+ self.initialized_len < self.total_len,
+ "too many values pushed to consumer"
+ );
- // Write item and increase the initialized length
+ // SAFETY: The assert above is a bounds check for this write, and we
+ // avoid assignment here so we do not drop an uninitialized T.
unsafe {
- dest.as_mut_ptr().write(item);
- self.len += 1;
+ // Write item and increase the initialized length
+ self.start.0.add(self.initialized_len).write(item);
+ self.initialized_len += 1;
}
self
@@ -146,14 +174,13 @@ impl<'c, T> Reducer<CollectResult<'c, T>> for CollectReducer {
// Merge if the CollectResults are adjacent and in left to right order
// else: drop the right piece now and total length will end up short in the end,
// when the correctness of the collected result is asserted.
- let left_end = left.target[left.len..].as_ptr();
- if left_end == right.target.as_ptr() {
- let len = left.len + right.release_ownership();
- unsafe {
- left.target = slice::from_raw_parts_mut(left.target.as_mut_ptr(), len);
+ unsafe {
+ let left_end = left.start.0.add(left.initialized_len);
+ if left_end == right.start.0 {
+ left.total_len += right.total_len;
+ left.initialized_len += right.release_ownership();
}
- left.len = len;
+ left
}
- left
}
}
diff --git a/src/iter/collect/mod.rs b/src/iter/collect/mod.rs
index 7cbf215..4044a68 100644
--- a/src/iter/collect/mod.rs
+++ b/src/iter/collect/mod.rs
@@ -1,6 +1,4 @@
-use super::{IndexedParallelIterator, IntoParallelIterator, ParallelExtend, ParallelIterator};
-use std::mem::MaybeUninit;
-use std::slice;
+use super::{IndexedParallelIterator, ParallelIterator};
mod consumer;
use self::consumer::CollectConsumer;
@@ -19,7 +17,7 @@ where
{
v.truncate(0); // clear any old data
let len = pi.len();
- Collect::new(v, len).with_consumer(|consumer| pi.drive(consumer));
+ collect_with_consumer(v, len, |consumer| pi.drive(consumer));
}
/// Collects the results of the iterator into the specified vector.
@@ -33,12 +31,12 @@ where
/// *any* `ParallelIterator` here, and `CollectConsumer` has to also implement
/// `UnindexedConsumer`. That implementation panics `unreachable!` in case
/// there's a bug where we actually do try to use this unindexed.
-fn special_extend<I, T>(pi: I, len: usize, v: &mut Vec<T>)
+pub(super) fn special_extend<I, T>(pi: I, len: usize, v: &mut Vec<T>)
where
I: ParallelIterator<Item = T>,
T: Send,
{
- Collect::new(v, len).with_consumer(|consumer| pi.drive_unindexed(consumer));
+ collect_with_consumer(v, len, |consumer| pi.drive_unindexed(consumer));
}
/// Unzips the results of the exact iterator into the specified vectors.
@@ -55,9 +53,9 @@ where
right.truncate(0);
let len = pi.len();
- Collect::new(right, len).with_consumer(|right_consumer| {
+ collect_with_consumer(right, len, |right_consumer| {
let mut right_result = None;
- Collect::new(left, len).with_consumer(|left_consumer| {
+ collect_with_consumer(left, len, |left_consumer| {
let (left_r, right_r) = unzip_indexed(pi, left_consumer, right_consumer);
right_result = Some(right_r);
left_r
@@ -66,108 +64,53 @@ where
});
}
-/// Manage the collection vector.
-struct Collect<'c, T: Send> {
- vec: &'c mut Vec<T>,
- len: usize,
-}
-
-impl<'c, T: Send + 'c> Collect<'c, T> {
- fn new(vec: &'c mut Vec<T>, len: usize) -> Self {
- Collect { vec, len }
- }
-
- /// Create a consumer on the slice of memory we are collecting into.
- ///
- /// The consumer needs to be used inside the scope function, and the
- /// complete collect result passed back.
- ///
- /// This method will verify the collect result, and panic if the slice
- /// was not fully written into. Otherwise, in the successful case,
- /// the vector is complete with the collected result.
- fn with_consumer<F>(mut self, scope_fn: F)
- where
- F: FnOnce(CollectConsumer<'_, T>) -> CollectResult<'_, T>,
- {
- let slice = Self::reserve_get_tail_slice(&mut self.vec, self.len);
- let result = scope_fn(CollectConsumer::new(slice));
-
- // The CollectResult represents a contiguous part of the
- // slice, that has been written to.
- // On unwind here, the CollectResult will be dropped.
- // If some producers on the way did not produce enough elements,
- // partial CollectResults may have been dropped without
- // being reduced to the final result, and we will see
- // that as the length coming up short.
- //
- // Here, we assert that `slice` is fully initialized. This is
- // checked by the following assert, which verifies if a
- // complete CollectResult was produced; if the length is
- // correct, it is necessarily covering the target slice.
- // Since we know that the consumer cannot have escaped from
- // `drive` (by parametricity, essentially), we know that any
- // stores that will happen, have happened. Unless some code is buggy,
- // that means we should have seen `len` total writes.
- let actual_writes = result.len();
- assert!(
- actual_writes == self.len,
- "expected {} total writes, but got {}",
- self.len,
- actual_writes
- );
-
- // Release the result's mutable borrow and "proxy ownership"
- // of the elements, before the vector takes it over.
- result.release_ownership();
-
- let new_len = self.vec.len() + self.len;
-
- unsafe {
- self.vec.set_len(new_len);
- }
- }
-
- /// Reserve space for `len` more elements in the vector,
- /// and return a slice to the uninitialized tail of the vector
- fn reserve_get_tail_slice(vec: &mut Vec<T>, len: usize) -> &mut [MaybeUninit<T>] {
- // Reserve the new space.
- vec.reserve(len);
-
- // TODO: use `Vec::spare_capacity_mut` instead
- // SAFETY: `MaybeUninit<T>` is guaranteed to have the same layout
- // as `T`, and we already made sure to have the additional space.
- let start = vec.len();
- let tail_ptr = vec[start..].as_mut_ptr() as *mut MaybeUninit<T>;
- unsafe { slice::from_raw_parts_mut(tail_ptr, len) }
- }
-}
-
-/// Extends a vector with items from a parallel iterator.
-impl<T> ParallelExtend<T> for Vec<T>
+/// Create a consumer on the slice of memory we are collecting into.
+///
+/// The consumer needs to be used inside the scope function, and the
+/// complete collect result passed back.
+///
+/// This method will verify the collect result, and panic if the slice
+/// was not fully written into. Otherwise, in the successful case,
+/// the vector is complete with the collected result.
+fn collect_with_consumer<T, F>(vec: &mut Vec<T>, len: usize, scope_fn: F)
where
T: Send,
+ F: FnOnce(CollectConsumer<'_, T>) -> CollectResult<'_, T>,
{
- fn par_extend<I>(&mut self, par_iter: I)
- where
- I: IntoParallelIterator<Item = T>,
- {
- // See the vec_collect benchmarks in rayon-demo for different strategies.
- let par_iter = par_iter.into_par_iter();
- match par_iter.opt_len() {
- Some(len) => {
- // When Rust gets specialization, we can get here for indexed iterators
- // without relying on `opt_len`. Until then, `special_extend()` fakes
- // an unindexed mode on the promise that `opt_len()` is accurate.
- special_extend(par_iter, len, self);
- }
- None => {
- // This works like `extend`, but `Vec::append` is more efficient.
- let list = super::extend::collect(par_iter);
- self.reserve(super::extend::len(&list));
- for mut vec in list {
- self.append(&mut vec);
- }
- }
- }
+ // Reserve space for `len` more elements in the vector,
+ vec.reserve(len);
+
+ // Create the consumer and run the callback for collection.
+ let result = scope_fn(CollectConsumer::appender(vec, len));
+
+ // The `CollectResult` represents a contiguous part of the slice, that has
+ // been written to. On unwind here, the `CollectResult` will be dropped. If
+ // some producers on the way did not produce enough elements, partial
+ // `CollectResult`s may have been dropped without being reduced to the final
+ // result, and we will see that as the length coming up short.
+ //
+ // Here, we assert that added length is fully initialized. This is checked
+ // by the following assert, which verifies if a complete `CollectResult`
+ // was produced; if the length is correct, it is necessarily covering the
+ // target slice. Since we know that the consumer cannot have escaped from
+ // `drive` (by parametricity, essentially), we know that any stores that
+ // will happen, have happened. Unless some code is buggy, that means we
+ // should have seen `len` total writes.
+ let actual_writes = result.len();
+ assert!(
+ actual_writes == len,
+ "expected {} total writes, but got {}",
+ len,
+ actual_writes
+ );
+
+ // Release the result's mutable borrow and "proxy ownership"
+ // of the elements, before the vector takes it over.
+ result.release_ownership();
+
+ let new_len = vec.len() + len;
+
+ unsafe {
+ vec.set_len(new_len);
}
}
diff --git a/src/iter/collect/test.rs b/src/iter/collect/test.rs
index ddf7757..97bec3f 100644
--- a/src/iter/collect/test.rs
+++ b/src/iter/collect/test.rs
@@ -5,7 +5,7 @@
// try to drive the "collect consumer" incorrectly. These should
// result in panics.
-use super::Collect;
+use super::collect_with_consumer;
use crate::iter::plumbing::*;
use rayon_core::join;
@@ -20,7 +20,7 @@ use std::thread::Result as ThreadResult;
#[should_panic(expected = "too many values")]
fn produce_too_many_items() {
let mut v = vec![];
- Collect::new(&mut v, 2).with_consumer(|consumer| {
+ collect_with_consumer(&mut v, 2, |consumer| {
let mut folder = consumer.into_folder();
folder = folder.consume(22);
folder = folder.consume(23);
@@ -35,8 +35,7 @@ fn produce_too_many_items() {
#[should_panic(expected = "expected 5 total writes, but got 2")]
fn produce_fewer_items() {
let mut v = vec![];
- let collect = Collect::new(&mut v, 5);
- collect.with_consumer(|consumer| {
+ collect_with_consumer(&mut v, 5, |consumer| {
let mut folder = consumer.into_folder();
folder = folder.consume(22);
folder = folder.consume(23);
@@ -49,8 +48,7 @@ fn produce_fewer_items() {
#[should_panic(expected = "expected 4 total writes, but got 2")]
fn left_produces_items_with_no_complete() {
let mut v = vec![];
- let collect = Collect::new(&mut v, 4);
- collect.with_consumer(|consumer| {
+ collect_with_consumer(&mut v, 4, |consumer| {
let (left_consumer, right_consumer, _) = consumer.split_at(2);
let mut left_folder = left_consumer.into_folder();
let mut right_folder = right_consumer.into_folder();
@@ -66,8 +64,7 @@ fn left_produces_items_with_no_complete() {
#[should_panic(expected = "expected 4 total writes, but got 2")]
fn right_produces_items_with_no_complete() {
let mut v = vec![];
- let collect = Collect::new(&mut v, 4);
- collect.with_consumer(|consumer| {
+ collect_with_consumer(&mut v, 4, |consumer| {
let (left_consumer, right_consumer, _) = consumer.split_at(2);
let mut left_folder = left_consumer.into_folder();
let mut right_folder = right_consumer.into_folder();
@@ -79,12 +76,12 @@ fn right_produces_items_with_no_complete() {
// Complete is not called by the consumer. Hence,the collection vector is not fully initialized.
#[test]
+#[cfg_attr(not(panic = "unwind"), ignore)]
fn produces_items_with_no_complete() {
let counter = DropCounter::default();
let mut v = vec![];
let panic_result = panic::catch_unwind(panic::AssertUnwindSafe(|| {
- let collect = Collect::new(&mut v, 2);
- collect.with_consumer(|consumer| {
+ collect_with_consumer(&mut v, 2, |consumer| {
let mut folder = consumer.into_folder();
folder = folder.consume(counter.element());
folder = folder.consume(counter.element());
@@ -102,8 +99,7 @@ fn produces_items_with_no_complete() {
#[should_panic(expected = "too many values")]
fn left_produces_too_many_items() {
let mut v = vec![];
- let collect = Collect::new(&mut v, 4);
- collect.with_consumer(|consumer| {
+ collect_with_consumer(&mut v, 4, |consumer| {
let (left_consumer, right_consumer, _) = consumer.split_at(2);
let mut left_folder = left_consumer.into_folder();
let mut right_folder = right_consumer.into_folder();
@@ -120,8 +116,7 @@ fn left_produces_too_many_items() {
#[should_panic(expected = "too many values")]
fn right_produces_too_many_items() {
let mut v = vec![];
- let collect = Collect::new(&mut v, 4);
- collect.with_consumer(|consumer| {
+ collect_with_consumer(&mut v, 4, |consumer| {
let (left_consumer, right_consumer, _) = consumer.split_at(2);
let mut left_folder = left_consumer.into_folder();
let mut right_folder = right_consumer.into_folder();
@@ -138,8 +133,7 @@ fn right_produces_too_many_items() {
#[should_panic(expected = "expected 4 total writes, but got 1")]
fn left_produces_fewer_items() {
let mut v = vec![];
- let collect = Collect::new(&mut v, 4);
- collect.with_consumer(|consumer| {
+ collect_with_consumer(&mut v, 4, |consumer| {
let reducer = consumer.to_reducer();
let (left_consumer, right_consumer, _) = consumer.split_at(2);
let mut left_folder = left_consumer.into_folder();
@@ -158,8 +152,7 @@ fn left_produces_fewer_items() {
#[should_panic(expected = "expected 4 total writes, but got 2")]
fn only_left_result() {
let mut v = vec![];
- let collect = Collect::new(&mut v, 4);
- collect.with_consumer(|consumer| {
+ collect_with_consumer(&mut v, 4, |consumer| {
let (left_consumer, right_consumer, _) = consumer.split_at(2);
let mut left_folder = left_consumer.into_folder();
let mut right_folder = right_consumer.into_folder();
@@ -177,8 +170,7 @@ fn only_left_result() {
#[should_panic(expected = "expected 4 total writes, but got 2")]
fn only_right_result() {
let mut v = vec![];
- let collect = Collect::new(&mut v, 4);
- collect.with_consumer(|consumer| {
+ collect_with_consumer(&mut v, 4, |consumer| {
let (left_consumer, right_consumer, _) = consumer.split_at(2);
let mut left_folder = left_consumer.into_folder();
let mut right_folder = right_consumer.into_folder();
@@ -195,8 +187,7 @@ fn only_right_result() {
#[should_panic(expected = "expected 4 total writes, but got 2")]
fn reducer_does_not_preserve_order() {
let mut v = vec![];
- let collect = Collect::new(&mut v, 4);
- collect.with_consumer(|consumer| {
+ collect_with_consumer(&mut v, 4, |consumer| {
let reducer = consumer.to_reducer();
let (left_consumer, right_consumer, _) = consumer.split_at(2);
let mut left_folder = left_consumer.into_folder();
@@ -215,8 +206,7 @@ fn reducer_does_not_preserve_order() {
#[should_panic(expected = "expected 4 total writes, but got 3")]
fn right_produces_fewer_items() {
let mut v = vec![];
- let collect = Collect::new(&mut v, 4);
- collect.with_consumer(|consumer| {
+ collect_with_consumer(&mut v, 4, |consumer| {
let reducer = consumer.to_reducer();
let (left_consumer, right_consumer, _) = consumer.split_at(2);
let mut left_folder = left_consumer.into_folder();
@@ -230,13 +220,12 @@ fn right_produces_fewer_items() {
}
// The left consumer panics and the right stops short, like `panic_fuse()`.
-// We should get the left panic without finishing `Collect::with_consumer`.
+// We should get the left panic without finishing `collect_with_consumer`.
#[test]
#[should_panic(expected = "left consumer panic")]
fn left_panics() {
let mut v = vec![];
- let collect = Collect::new(&mut v, 4);
- collect.with_consumer(|consumer| {
+ collect_with_consumer(&mut v, 4, |consumer| {
let reducer = consumer.to_reducer();
let (left_consumer, right_consumer, _) = consumer.split_at(2);
let (left_result, right_result) = join(
@@ -257,13 +246,12 @@ fn left_panics() {
}
// The right consumer panics and the left stops short, like `panic_fuse()`.
-// We should get the right panic without finishing `Collect::with_consumer`.
+// We should get the right panic without finishing `collect_with_consumer`.
#[test]
#[should_panic(expected = "right consumer panic")]
fn right_panics() {
let mut v = vec![];
- let collect = Collect::new(&mut v, 4);
- collect.with_consumer(|consumer| {
+ collect_with_consumer(&mut v, 4, |consumer| {
let reducer = consumer.to_reducer();
let (left_consumer, right_consumer, _) = consumer.split_at(2);
let (left_result, right_result) = join(
@@ -286,12 +274,12 @@ fn right_panics() {
// The left consumer produces fewer items while the right
// consumer produces correct number; check that created elements are dropped
#[test]
+#[cfg_attr(not(panic = "unwind"), ignore)]
fn left_produces_fewer_items_drops() {
let counter = DropCounter::default();
let mut v = vec![];
let panic_result = panic::catch_unwind(panic::AssertUnwindSafe(|| {
- let collect = Collect::new(&mut v, 4);
- collect.with_consumer(|consumer| {
+ collect_with_consumer(&mut v, 4, |consumer| {
let reducer = consumer.to_reducer();
let (left_consumer, right_consumer, _) = consumer.split_at(2);
let mut left_folder = left_consumer.into_folder();
diff --git a/src/iter/extend.rs b/src/iter/extend.rs
index fb89249..1769d47 100644
--- a/src/iter/extend.rs
+++ b/src/iter/extend.rs
@@ -1,4 +1,5 @@
use super::noop::NoopConsumer;
+use super::plumbing::{Consumer, Folder, Reducer, UnindexedConsumer};
use super::{IntoParallelIterator, ParallelExtend, ParallelIterator};
use std::borrow::Cow;
@@ -9,55 +10,91 @@ use std::hash::{BuildHasher, Hash};
/// Performs a generic `par_extend` by collecting to a `LinkedList<Vec<_>>` in
/// parallel, then extending the collection sequentially.
-fn extend<C, I, F>(collection: &mut C, par_iter: I, reserve: F)
-where
- I: IntoParallelIterator,
- F: FnOnce(&mut C, &LinkedList<Vec<I::Item>>),
- C: Extend<I::Item>,
-{
- let list = collect(par_iter);
- reserve(collection, &list);
- for vec in list {
- collection.extend(vec);
- }
+macro_rules! extend {
+ ($self:ident, $par_iter:ident, $extend:ident) => {
+ $extend(
+ $self,
+ $par_iter.into_par_iter().drive_unindexed(ListVecConsumer),
+ );
+ };
}
-pub(super) fn collect<I>(par_iter: I) -> LinkedList<Vec<I::Item>>
-where
- I: IntoParallelIterator,
-{
- par_iter
- .into_par_iter()
- .fold(Vec::new, vec_push)
- .map(as_list)
- .reduce(LinkedList::new, list_append)
+/// Computes the total length of a `LinkedList<Vec<_>>`.
+fn len<T>(list: &LinkedList<Vec<T>>) -> usize {
+ list.iter().map(Vec::len).sum()
}
-fn vec_push<T>(mut vec: Vec<T>, elem: T) -> Vec<T> {
- vec.push(elem);
- vec
-}
+struct ListVecConsumer;
-fn as_list<T>(item: T) -> LinkedList<T> {
- let mut list = LinkedList::new();
- list.push_back(item);
- list
+struct ListVecFolder<T> {
+ vec: Vec<T>,
}
-fn list_append<T>(mut list1: LinkedList<T>, mut list2: LinkedList<T>) -> LinkedList<T> {
- list1.append(&mut list2);
- list1
+impl<T: Send> Consumer<T> for ListVecConsumer {
+ type Folder = ListVecFolder<T>;
+ type Reducer = ListReducer;
+ type Result = LinkedList<Vec<T>>;
+
+ fn split_at(self, _index: usize) -> (Self, Self, Self::Reducer) {
+ (Self, Self, ListReducer)
+ }
+
+ fn into_folder(self) -> Self::Folder {
+ ListVecFolder { vec: Vec::new() }
+ }
+
+ fn full(&self) -> bool {
+ false
+ }
}
-/// Computes the total length of a `LinkedList<Vec<_>>`.
-pub(super) fn len<T>(list: &LinkedList<Vec<T>>) -> usize {
- list.iter().map(Vec::len).sum()
+impl<T: Send> UnindexedConsumer<T> for ListVecConsumer {
+ fn split_off_left(&self) -> Self {
+ Self
+ }
+
+ fn to_reducer(&self) -> Self::Reducer {
+ ListReducer
+ }
}
-fn no_reserve<C, T>(_: &mut C, _: &LinkedList<Vec<T>>) {}
+impl<T> Folder<T> for ListVecFolder<T> {
+ type Result = LinkedList<Vec<T>>;
+
+ fn consume(mut self, item: T) -> Self {
+ self.vec.push(item);
+ self
+ }
+
+ fn consume_iter<I>(mut self, iter: I) -> Self
+ where
+ I: IntoIterator<Item = T>,
+ {
+ self.vec.extend(iter);
+ self
+ }
-fn heap_reserve<T: Ord, U>(heap: &mut BinaryHeap<T>, list: &LinkedList<Vec<U>>) {
- heap.reserve(len(list));
+ fn complete(self) -> Self::Result {
+ let mut list = LinkedList::new();
+ if !self.vec.is_empty() {
+ list.push_back(self.vec);
+ }
+ list
+ }
+
+ fn full(&self) -> bool {
+ false
+ }
+}
+
+fn heap_extend<T, Item>(heap: &mut BinaryHeap<T>, list: LinkedList<Vec<Item>>)
+where
+ BinaryHeap<T>: Extend<Item>,
+{
+ heap.reserve(len(&list));
+ for vec in list {
+ heap.extend(vec);
+ }
}
/// Extends a binary heap with items from a parallel iterator.
@@ -69,7 +106,7 @@ where
where
I: IntoParallelIterator<Item = T>,
{
- extend(self, par_iter, heap_reserve);
+ extend!(self, par_iter, heap_extend);
}
}
@@ -82,7 +119,16 @@ where
where
I: IntoParallelIterator<Item = &'a T>,
{
- extend(self, par_iter, heap_reserve);
+ extend!(self, par_iter, heap_extend);
+ }
+}
+
+fn btree_map_extend<K, V, Item>(map: &mut BTreeMap<K, V>, list: LinkedList<Vec<Item>>)
+where
+ BTreeMap<K, V>: Extend<Item>,
+{
+ for vec in list {
+ map.extend(vec);
}
}
@@ -96,7 +142,7 @@ where
where
I: IntoParallelIterator<Item = (K, V)>,
{
- extend(self, par_iter, no_reserve);
+ extend!(self, par_iter, btree_map_extend);
}
}
@@ -110,7 +156,16 @@ where
where
I: IntoParallelIterator<Item = (&'a K, &'a V)>,
{
- extend(self, par_iter, no_reserve);
+ extend!(self, par_iter, btree_map_extend);
+ }
+}
+
+fn btree_set_extend<T, Item>(set: &mut BTreeSet<T>, list: LinkedList<Vec<Item>>)
+where
+ BTreeSet<T>: Extend<Item>,
+{
+ for vec in list {
+ set.extend(vec);
}
}
@@ -123,7 +178,7 @@ where
where
I: IntoParallelIterator<Item = T>,
{
- extend(self, par_iter, no_reserve);
+ extend!(self, par_iter, btree_set_extend);
}
}
@@ -136,16 +191,20 @@ where
where
I: IntoParallelIterator<Item = &'a T>,
{
- extend(self, par_iter, no_reserve);
+ extend!(self, par_iter, btree_set_extend);
}
}
-fn map_reserve<K, V, S, U>(map: &mut HashMap<K, V, S>, list: &LinkedList<Vec<U>>)
+fn hash_map_extend<K, V, S, Item>(map: &mut HashMap<K, V, S>, list: LinkedList<Vec<Item>>)
where
+ HashMap<K, V, S>: Extend<Item>,
K: Eq + Hash,
S: BuildHasher,
{
- map.reserve(len(list));
+ map.reserve(len(&list));
+ for vec in list {
+ map.extend(vec);
+ }
}
/// Extends a hash map with items from a parallel iterator.
@@ -160,7 +219,7 @@ where
I: IntoParallelIterator<Item = (K, V)>,
{
// See the map_collect benchmarks in rayon-demo for different strategies.
- extend(self, par_iter, map_reserve);
+ extend!(self, par_iter, hash_map_extend);
}
}
@@ -175,16 +234,20 @@ where
where
I: IntoParallelIterator<Item = (&'a K, &'a V)>,
{
- extend(self, par_iter, map_reserve);
+ extend!(self, par_iter, hash_map_extend);
}
}
-fn set_reserve<T, S, U>(set: &mut HashSet<T, S>, list: &LinkedList<Vec<U>>)
+fn hash_set_extend<T, S, Item>(set: &mut HashSet<T, S>, list: LinkedList<Vec<Item>>)
where
+ HashSet<T, S>: Extend<Item>,
T: Eq + Hash,
S: BuildHasher,
{
- set.reserve(len(list));
+ set.reserve(len(&list));
+ for vec in list {
+ set.extend(vec);
+ }
}
/// Extends a hash set with items from a parallel iterator.
@@ -197,7 +260,7 @@ where
where
I: IntoParallelIterator<Item = T>,
{
- extend(self, par_iter, set_reserve);
+ extend!(self, par_iter, hash_set_extend);
}
}
@@ -211,15 +274,10 @@ where
where
I: IntoParallelIterator<Item = &'a T>,
{
- extend(self, par_iter, set_reserve);
+ extend!(self, par_iter, hash_set_extend);
}
}
-fn list_push_back<T>(mut list: LinkedList<T>, elem: T) -> LinkedList<T> {
- list.push_back(elem);
- list
-}
-
/// Extends a linked list with items from a parallel iterator.
impl<T> ParallelExtend<T> for LinkedList<T>
where
@@ -229,10 +287,7 @@ where
where
I: IntoParallelIterator<Item = T>,
{
- let mut list = par_iter
- .into_par_iter()
- .fold(LinkedList::new, list_push_back)
- .reduce(LinkedList::new, list_append);
+ let mut list = par_iter.into_par_iter().drive_unindexed(ListConsumer);
self.append(&mut list);
}
}
@@ -246,13 +301,83 @@ where
where
I: IntoParallelIterator<Item = &'a T>,
{
- self.par_extend(par_iter.into_par_iter().cloned())
+ self.par_extend(par_iter.into_par_iter().copied())
+ }
+}
+
+struct ListConsumer;
+
+struct ListFolder<T> {
+ list: LinkedList<T>,
+}
+
+struct ListReducer;
+
+impl<T: Send> Consumer<T> for ListConsumer {
+ type Folder = ListFolder<T>;
+ type Reducer = ListReducer;
+ type Result = LinkedList<T>;
+
+ fn split_at(self, _index: usize) -> (Self, Self, Self::Reducer) {
+ (Self, Self, ListReducer)
+ }
+
+ fn into_folder(self) -> Self::Folder {
+ ListFolder {
+ list: LinkedList::new(),
+ }
+ }
+
+ fn full(&self) -> bool {
+ false
+ }
+}
+
+impl<T: Send> UnindexedConsumer<T> for ListConsumer {
+ fn split_off_left(&self) -> Self {
+ Self
+ }
+
+ fn to_reducer(&self) -> Self::Reducer {
+ ListReducer
+ }
+}
+
+impl<T> Folder<T> for ListFolder<T> {
+ type Result = LinkedList<T>;
+
+ fn consume(mut self, item: T) -> Self {
+ self.list.push_back(item);
+ self
+ }
+
+ fn consume_iter<I>(mut self, iter: I) -> Self
+ where
+ I: IntoIterator<Item = T>,
+ {
+ self.list.extend(iter);
+ self
+ }
+
+ fn complete(self) -> Self::Result {
+ self.list
+ }
+
+ fn full(&self) -> bool {
+ false
+ }
+}
+
+impl<T> Reducer<LinkedList<T>> for ListReducer {
+ fn reduce(self, mut left: LinkedList<T>, mut right: LinkedList<T>) -> LinkedList<T> {
+ left.append(&mut right);
+ left
}
}
-fn string_push(mut string: String, ch: char) -> String {
- string.push(ch);
- string
+fn flat_string_extend(string: &mut String, list: LinkedList<String>) {
+ string.reserve(list.iter().map(String::len).sum());
+ string.extend(list);
}
/// Extends a string with characters from a parallel iterator.
@@ -263,14 +388,8 @@ impl ParallelExtend<char> for String {
{
// This is like `extend`, but `Vec<char>` is less efficient to deal
// with than `String`, so instead collect to `LinkedList<String>`.
- let list: LinkedList<_> = par_iter
- .into_par_iter()
- .fold(String::new, string_push)
- .map(as_list)
- .reduce(LinkedList::new, list_append);
-
- self.reserve(list.iter().map(String::len).sum());
- self.extend(list)
+ let list = par_iter.into_par_iter().drive_unindexed(ListStringConsumer);
+ flat_string_extend(self, list);
}
}
@@ -280,13 +399,85 @@ impl<'a> ParallelExtend<&'a char> for String {
where
I: IntoParallelIterator<Item = &'a char>,
{
- self.par_extend(par_iter.into_par_iter().cloned())
+ self.par_extend(par_iter.into_par_iter().copied())
}
}
-fn string_reserve<T: AsRef<str>>(string: &mut String, list: &LinkedList<Vec<T>>) {
- let len = list.iter().flatten().map(T::as_ref).map(str::len).sum();
+struct ListStringConsumer;
+
+struct ListStringFolder {
+ string: String,
+}
+
+impl Consumer<char> for ListStringConsumer {
+ type Folder = ListStringFolder;
+ type Reducer = ListReducer;
+ type Result = LinkedList<String>;
+
+ fn split_at(self, _index: usize) -> (Self, Self, Self::Reducer) {
+ (Self, Self, ListReducer)
+ }
+
+ fn into_folder(self) -> Self::Folder {
+ ListStringFolder {
+ string: String::new(),
+ }
+ }
+
+ fn full(&self) -> bool {
+ false
+ }
+}
+
+impl UnindexedConsumer<char> for ListStringConsumer {
+ fn split_off_left(&self) -> Self {
+ Self
+ }
+
+ fn to_reducer(&self) -> Self::Reducer {
+ ListReducer
+ }
+}
+
+impl Folder<char> for ListStringFolder {
+ type Result = LinkedList<String>;
+
+ fn consume(mut self, item: char) -> Self {
+ self.string.push(item);
+ self
+ }
+
+ fn consume_iter<I>(mut self, iter: I) -> Self
+ where
+ I: IntoIterator<Item = char>,
+ {
+ self.string.extend(iter);
+ self
+ }
+
+ fn complete(self) -> Self::Result {
+ let mut list = LinkedList::new();
+ if !self.string.is_empty() {
+ list.push_back(self.string);
+ }
+ list
+ }
+
+ fn full(&self) -> bool {
+ false
+ }
+}
+
+fn string_extend<Item>(string: &mut String, list: LinkedList<Vec<Item>>)
+where
+ String: Extend<Item>,
+ Item: AsRef<str>,
+{
+ let len = list.iter().flatten().map(Item::as_ref).map(str::len).sum();
string.reserve(len);
+ for vec in list {
+ string.extend(vec);
+ }
}
/// Extends a string with string slices from a parallel iterator.
@@ -295,7 +486,7 @@ impl<'a> ParallelExtend<&'a str> for String {
where
I: IntoParallelIterator<Item = &'a str>,
{
- extend(self, par_iter, string_reserve);
+ extend!(self, par_iter, string_extend);
}
}
@@ -305,7 +496,7 @@ impl ParallelExtend<String> for String {
where
I: IntoParallelIterator<Item = String>,
{
- extend(self, par_iter, string_reserve);
+ extend!(self, par_iter, string_extend);
}
}
@@ -315,12 +506,18 @@ impl<'a> ParallelExtend<Cow<'a, str>> for String {
where
I: IntoParallelIterator<Item = Cow<'a, str>>,
{
- extend(self, par_iter, string_reserve);
+ extend!(self, par_iter, string_extend);
}
}
-fn deque_reserve<T, U>(deque: &mut VecDeque<T>, list: &LinkedList<Vec<U>>) {
- deque.reserve(len(list));
+fn deque_extend<T, Item>(deque: &mut VecDeque<T>, list: LinkedList<Vec<Item>>)
+where
+ VecDeque<T>: Extend<Item>,
+{
+ deque.reserve(len(&list));
+ for vec in list {
+ deque.extend(vec);
+ }
}
/// Extends a deque with items from a parallel iterator.
@@ -332,7 +529,7 @@ where
where
I: IntoParallelIterator<Item = T>,
{
- extend(self, par_iter, deque_reserve);
+ extend!(self, par_iter, deque_extend);
}
}
@@ -345,12 +542,43 @@ where
where
I: IntoParallelIterator<Item = &'a T>,
{
- extend(self, par_iter, deque_reserve);
+ extend!(self, par_iter, deque_extend);
}
}
-// See the `collect` module for the `Vec<T>` implementation.
-// impl<T> ParallelExtend<T> for Vec<T>
+fn vec_append<T>(vec: &mut Vec<T>, list: LinkedList<Vec<T>>) {
+ vec.reserve(len(&list));
+ for mut other in list {
+ vec.append(&mut other);
+ }
+}
+
+/// Extends a vector with items from a parallel iterator.
+impl<T> ParallelExtend<T> for Vec<T>
+where
+ T: Send,
+{
+ fn par_extend<I>(&mut self, par_iter: I)
+ where
+ I: IntoParallelIterator<Item = T>,
+ {
+ // See the vec_collect benchmarks in rayon-demo for different strategies.
+ let par_iter = par_iter.into_par_iter();
+ match par_iter.opt_len() {
+ Some(len) => {
+ // When Rust gets specialization, we can get here for indexed iterators
+ // without relying on `opt_len`. Until then, `special_extend()` fakes
+ // an unindexed mode on the promise that `opt_len()` is accurate.
+ super::collect::special_extend(par_iter, len, self);
+ }
+ None => {
+ // This works like `extend`, but `Vec::append` is more efficient.
+ let list = par_iter.drive_unindexed(ListVecConsumer);
+ vec_append(self, list);
+ }
+ }
+ }
+}
/// Extends a vector with copied items from a parallel iterator.
impl<'a, T> ParallelExtend<&'a T> for Vec<T>
@@ -361,7 +589,7 @@ where
where
I: IntoParallelIterator<Item = &'a T>,
{
- self.par_extend(par_iter.into_par_iter().cloned())
+ self.par_extend(par_iter.into_par_iter().copied())
}
}
diff --git a/src/iter/filter.rs b/src/iter/filter.rs
index 38627f7..e1b74ba 100644
--- a/src/iter/filter.rs
+++ b/src/iter/filter.rs
@@ -97,7 +97,7 @@ where
P: Fn(&T) -> bool + Sync,
{
fn split_off_left(&self) -> Self {
- FilterConsumer::new(self.base.split_off_left(), &self.filter_op)
+ FilterConsumer::new(self.base.split_off_left(), self.filter_op)
}
fn to_reducer(&self) -> Self::Reducer {
diff --git a/src/iter/filter_map.rs b/src/iter/filter_map.rs
index f19c385..db1c7e3 100644
--- a/src/iter/filter_map.rs
+++ b/src/iter/filter_map.rs
@@ -98,7 +98,7 @@ where
P: Fn(T) -> Option<U> + Sync + 'p,
{
fn split_off_left(&self) -> Self {
- FilterMapConsumer::new(self.base.split_off_left(), &self.filter_op)
+ FilterMapConsumer::new(self.base.split_off_left(), self.filter_op)
}
fn to_reducer(&self) -> Self::Reducer {
diff --git a/src/iter/find.rs b/src/iter/find.rs
index 971db2b..b16ee84 100644
--- a/src/iter/find.rs
+++ b/src/iter/find.rs
@@ -94,7 +94,7 @@ where
self.item = iter
.into_iter()
// stop iterating if another thread has found something
- .take_while(not_full(&self.found))
+ .take_while(not_full(self.found))
.find(self.find_op);
if self.item.is_some() {
self.found.store(true, Ordering::Relaxed)
diff --git a/src/iter/fold_chunks.rs b/src/iter/fold_chunks.rs
new file mode 100644
index 0000000..185fb1a
--- /dev/null
+++ b/src/iter/fold_chunks.rs
@@ -0,0 +1,236 @@
+use std::fmt::{self, Debug};
+
+use super::chunks::ChunkProducer;
+use super::plumbing::*;
+use super::*;
+use crate::math::div_round_up;
+
+/// `FoldChunks` is an iterator that groups elements of an underlying iterator and applies a
+/// function over them, producing a single value for each group.
+///
+/// This struct is created by the [`fold_chunks()`] method on [`IndexedParallelIterator`]
+///
+/// [`fold_chunks()`]: trait.IndexedParallelIterator.html#method.fold_chunks
+/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Clone)]
+pub struct FoldChunks<I, ID, F>
+where
+ I: IndexedParallelIterator,
+{
+ base: I,
+ chunk_size: usize,
+ fold_op: F,
+ identity: ID,
+}
+
+impl<I: IndexedParallelIterator + Debug, ID, F> Debug for FoldChunks<I, ID, F> {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ f.debug_struct("Fold")
+ .field("base", &self.base)
+ .field("chunk_size", &self.chunk_size)
+ .finish()
+ }
+}
+
+impl<I, ID, U, F> FoldChunks<I, ID, F>
+where
+ I: IndexedParallelIterator,
+ ID: Fn() -> U + Send + Sync,
+ F: Fn(U, I::Item) -> U + Send + Sync,
+ U: Send,
+{
+ /// Creates a new `FoldChunks` iterator
+ pub(super) fn new(base: I, chunk_size: usize, identity: ID, fold_op: F) -> Self {
+ FoldChunks {
+ base,
+ chunk_size,
+ identity,
+ fold_op,
+ }
+ }
+}
+
+impl<I, ID, U, F> ParallelIterator for FoldChunks<I, ID, F>
+where
+ I: IndexedParallelIterator,
+ ID: Fn() -> U + Send + Sync,
+ F: Fn(U, I::Item) -> U + Send + Sync,
+ U: Send,
+{
+ type Item = U;
+
+ fn drive_unindexed<C>(self, consumer: C) -> C::Result
+ where
+ C: Consumer<U>,
+ {
+ bridge(self, consumer)
+ }
+
+ fn opt_len(&self) -> Option<usize> {
+ Some(self.len())
+ }
+}
+
+impl<I, ID, U, F> IndexedParallelIterator for FoldChunks<I, ID, F>
+where
+ I: IndexedParallelIterator,
+ ID: Fn() -> U + Send + Sync,
+ F: Fn(U, I::Item) -> U + Send + Sync,
+ U: Send,
+{
+ fn len(&self) -> usize {
+ div_round_up(self.base.len(), self.chunk_size)
+ }
+
+ fn drive<C>(self, consumer: C) -> C::Result
+ where
+ C: Consumer<Self::Item>,
+ {
+ bridge(self, consumer)
+ }
+
+ fn with_producer<CB>(self, callback: CB) -> CB::Output
+ where
+ CB: ProducerCallback<Self::Item>,
+ {
+ let len = self.base.len();
+ return self.base.with_producer(Callback {
+ chunk_size: self.chunk_size,
+ len,
+ identity: self.identity,
+ fold_op: self.fold_op,
+ callback,
+ });
+
+ struct Callback<CB, ID, F> {
+ chunk_size: usize,
+ len: usize,
+ identity: ID,
+ fold_op: F,
+ callback: CB,
+ }
+
+ impl<T, CB, ID, U, F> ProducerCallback<T> for Callback<CB, ID, F>
+ where
+ CB: ProducerCallback<U>,
+ ID: Fn() -> U + Send + Sync,
+ F: Fn(U, T) -> U + Send + Sync,
+ {
+ type Output = CB::Output;
+
+ fn callback<P>(self, base: P) -> CB::Output
+ where
+ P: Producer<Item = T>,
+ {
+ let identity = &self.identity;
+ let fold_op = &self.fold_op;
+ let fold_iter = move |iter: P::IntoIter| iter.fold(identity(), fold_op);
+ let producer = ChunkProducer::new(self.chunk_size, self.len, base, fold_iter);
+ self.callback.callback(producer)
+ }
+ }
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+ use std::ops::Add;
+
+ #[test]
+ fn check_fold_chunks() {
+ let words = "bishbashbosh!"
+ .chars()
+ .collect::<Vec<_>>()
+ .into_par_iter()
+ .fold_chunks(4, String::new, |mut s, c| {
+ s.push(c);
+ s
+ })
+ .collect::<Vec<_>>();
+
+ assert_eq!(words, vec!["bish", "bash", "bosh", "!"]);
+ }
+
+ // 'closure' values for tests below
+ fn id() -> i32 {
+ 0
+ }
+ fn sum<T, U>(x: T, y: U) -> T
+ where
+ T: Add<U, Output = T>,
+ {
+ x + y
+ }
+
+ #[test]
+ #[should_panic(expected = "chunk_size must not be zero")]
+ fn check_fold_chunks_zero_size() {
+ let _: Vec<i32> = vec![1, 2, 3]
+ .into_par_iter()
+ .fold_chunks(0, id, sum)
+ .collect();
+ }
+
+ #[test]
+ fn check_fold_chunks_even_size() {
+ assert_eq!(
+ vec![1 + 2 + 3, 4 + 5 + 6, 7 + 8 + 9],
+ (1..10)
+ .into_par_iter()
+ .fold_chunks(3, id, sum)
+ .collect::<Vec<i32>>()
+ );
+ }
+
+ #[test]
+ fn check_fold_chunks_empty() {
+ let v: Vec<i32> = vec![];
+ let expected: Vec<i32> = vec![];
+ assert_eq!(
+ expected,
+ v.into_par_iter()
+ .fold_chunks(2, id, sum)
+ .collect::<Vec<i32>>()
+ );
+ }
+
+ #[test]
+ fn check_fold_chunks_len() {
+ assert_eq!(4, (0..8).into_par_iter().fold_chunks(2, id, sum).len());
+ assert_eq!(3, (0..9).into_par_iter().fold_chunks(3, id, sum).len());
+ assert_eq!(3, (0..8).into_par_iter().fold_chunks(3, id, sum).len());
+ assert_eq!(1, (&[1]).par_iter().fold_chunks(3, id, sum).len());
+ assert_eq!(0, (0..0).into_par_iter().fold_chunks(3, id, sum).len());
+ }
+
+ #[test]
+ fn check_fold_chunks_uneven() {
+ let cases: Vec<(Vec<u32>, usize, Vec<u32>)> = vec![
+ ((0..5).collect(), 3, vec![0 + 1 + 2, 3 + 4]),
+ (vec![1], 5, vec![1]),
+ ((0..4).collect(), 3, vec![0 + 1 + 2, 3]),
+ ];
+
+ for (i, (v, n, expected)) in cases.into_iter().enumerate() {
+ let mut res: Vec<u32> = vec![];
+ v.par_iter()
+ .fold_chunks(n, || 0, sum)
+ .collect_into_vec(&mut res);
+ assert_eq!(expected, res, "Case {} failed", i);
+
+ res.truncate(0);
+ v.into_par_iter()
+ .fold_chunks(n, || 0, sum)
+ .rev()
+ .collect_into_vec(&mut res);
+ assert_eq!(
+ expected.into_iter().rev().collect::<Vec<u32>>(),
+ res,
+ "Case {} reversed failed",
+ i
+ );
+ }
+ }
+}
diff --git a/src/iter/fold_chunks_with.rs b/src/iter/fold_chunks_with.rs
new file mode 100644
index 0000000..af120ae
--- /dev/null
+++ b/src/iter/fold_chunks_with.rs
@@ -0,0 +1,231 @@
+use std::fmt::{self, Debug};
+
+use super::chunks::ChunkProducer;
+use super::plumbing::*;
+use super::*;
+use crate::math::div_round_up;
+
+/// `FoldChunksWith` is an iterator that groups elements of an underlying iterator and applies a
+/// function over them, producing a single value for each group.
+///
+/// This struct is created by the [`fold_chunks_with()`] method on [`IndexedParallelIterator`]
+///
+/// [`fold_chunks_with()`]: trait.IndexedParallelIterator.html#method.fold_chunks
+/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Clone)]
+pub struct FoldChunksWith<I, U, F>
+where
+ I: IndexedParallelIterator,
+{
+ base: I,
+ chunk_size: usize,
+ item: U,
+ fold_op: F,
+}
+
+impl<I: IndexedParallelIterator + Debug, U: Debug, F> Debug for FoldChunksWith<I, U, F> {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ f.debug_struct("Fold")
+ .field("base", &self.base)
+ .field("chunk_size", &self.chunk_size)
+ .field("item", &self.item)
+ .finish()
+ }
+}
+
+impl<I, U, F> FoldChunksWith<I, U, F>
+where
+ I: IndexedParallelIterator,
+ U: Send + Clone,
+ F: Fn(U, I::Item) -> U + Send + Sync,
+{
+ /// Creates a new `FoldChunksWith` iterator
+ pub(super) fn new(base: I, chunk_size: usize, item: U, fold_op: F) -> Self {
+ FoldChunksWith {
+ base,
+ chunk_size,
+ item,
+ fold_op,
+ }
+ }
+}
+
+impl<I, U, F> ParallelIterator for FoldChunksWith<I, U, F>
+where
+ I: IndexedParallelIterator,
+ U: Send + Clone,
+ F: Fn(U, I::Item) -> U + Send + Sync,
+{
+ type Item = U;
+
+ fn drive_unindexed<C>(self, consumer: C) -> C::Result
+ where
+ C: Consumer<U>,
+ {
+ bridge(self, consumer)
+ }
+
+ fn opt_len(&self) -> Option<usize> {
+ Some(self.len())
+ }
+}
+
+impl<I, U, F> IndexedParallelIterator for FoldChunksWith<I, U, F>
+where
+ I: IndexedParallelIterator,
+ U: Send + Clone,
+ F: Fn(U, I::Item) -> U + Send + Sync,
+{
+ fn len(&self) -> usize {
+ div_round_up(self.base.len(), self.chunk_size)
+ }
+
+ fn drive<C>(self, consumer: C) -> C::Result
+ where
+ C: Consumer<Self::Item>,
+ {
+ bridge(self, consumer)
+ }
+
+ fn with_producer<CB>(self, callback: CB) -> CB::Output
+ where
+ CB: ProducerCallback<Self::Item>,
+ {
+ let len = self.base.len();
+ return self.base.with_producer(Callback {
+ chunk_size: self.chunk_size,
+ len,
+ item: self.item,
+ fold_op: self.fold_op,
+ callback,
+ });
+
+ struct Callback<CB, T, F> {
+ chunk_size: usize,
+ len: usize,
+ item: T,
+ fold_op: F,
+ callback: CB,
+ }
+
+ impl<T, U, F, CB> ProducerCallback<T> for Callback<CB, U, F>
+ where
+ CB: ProducerCallback<U>,
+ U: Send + Clone,
+ F: Fn(U, T) -> U + Send + Sync,
+ {
+ type Output = CB::Output;
+
+ fn callback<P>(self, base: P) -> CB::Output
+ where
+ P: Producer<Item = T>,
+ {
+ let item = self.item;
+ let fold_op = &self.fold_op;
+ let fold_iter = move |iter: P::IntoIter| iter.fold(item.clone(), fold_op);
+ let producer = ChunkProducer::new(self.chunk_size, self.len, base, fold_iter);
+ self.callback.callback(producer)
+ }
+ }
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+ use std::ops::Add;
+
+ #[test]
+ fn check_fold_chunks_with() {
+ let words = "bishbashbosh!"
+ .chars()
+ .collect::<Vec<_>>()
+ .into_par_iter()
+ .fold_chunks_with(4, String::new(), |mut s, c| {
+ s.push(c);
+ s
+ })
+ .collect::<Vec<_>>();
+
+ assert_eq!(words, vec!["bish", "bash", "bosh", "!"]);
+ }
+
+ // 'closure' value for tests below
+ fn sum<T, U>(x: T, y: U) -> T
+ where
+ T: Add<U, Output = T>,
+ {
+ x + y
+ }
+
+ #[test]
+ #[should_panic(expected = "chunk_size must not be zero")]
+ fn check_fold_chunks_zero_size() {
+ let _: Vec<i32> = vec![1, 2, 3]
+ .into_par_iter()
+ .fold_chunks_with(0, 0, sum)
+ .collect();
+ }
+
+ #[test]
+ fn check_fold_chunks_even_size() {
+ assert_eq!(
+ vec![1 + 2 + 3, 4 + 5 + 6, 7 + 8 + 9],
+ (1..10)
+ .into_par_iter()
+ .fold_chunks_with(3, 0, sum)
+ .collect::<Vec<i32>>()
+ );
+ }
+
+ #[test]
+ fn check_fold_chunks_with_empty() {
+ let v: Vec<i32> = vec![];
+ let expected: Vec<i32> = vec![];
+ assert_eq!(
+ expected,
+ v.into_par_iter()
+ .fold_chunks_with(2, 0, sum)
+ .collect::<Vec<i32>>()
+ );
+ }
+
+ #[test]
+ fn check_fold_chunks_len() {
+ assert_eq!(4, (0..8).into_par_iter().fold_chunks_with(2, 0, sum).len());
+ assert_eq!(3, (0..9).into_par_iter().fold_chunks_with(3, 0, sum).len());
+ assert_eq!(3, (0..8).into_par_iter().fold_chunks_with(3, 0, sum).len());
+ assert_eq!(1, (&[1]).par_iter().fold_chunks_with(3, 0, sum).len());
+ assert_eq!(0, (0..0).into_par_iter().fold_chunks_with(3, 0, sum).len());
+ }
+
+ #[test]
+ fn check_fold_chunks_uneven() {
+ let cases: Vec<(Vec<u32>, usize, Vec<u32>)> = vec![
+ ((0..5).collect(), 3, vec![0 + 1 + 2, 3 + 4]),
+ (vec![1], 5, vec![1]),
+ ((0..4).collect(), 3, vec![0 + 1 + 2, 3]),
+ ];
+
+ for (i, (v, n, expected)) in cases.into_iter().enumerate() {
+ let mut res: Vec<u32> = vec![];
+ v.par_iter()
+ .fold_chunks_with(n, 0, sum)
+ .collect_into_vec(&mut res);
+ assert_eq!(expected, res, "Case {} failed", i);
+
+ res.truncate(0);
+ v.into_par_iter()
+ .fold_chunks_with(n, 0, sum)
+ .rev()
+ .collect_into_vec(&mut res);
+ assert_eq!(
+ expected.into_iter().rev().collect::<Vec<u32>>(),
+ res,
+ "Case {} reversed failed",
+ i
+ );
+ }
+ }
+}
diff --git a/src/iter/inspect.rs b/src/iter/inspect.rs
index 9b1cd09..c50ca02 100644
--- a/src/iter/inspect.rs
+++ b/src/iter/inspect.rs
@@ -209,7 +209,7 @@ where
F: Fn(&T) + Sync,
{
fn split_off_left(&self) -> Self {
- InspectConsumer::new(self.base.split_off_left(), &self.inspect_op)
+ InspectConsumer::new(self.base.split_off_left(), self.inspect_op)
}
fn to_reducer(&self) -> Self::Reducer {
diff --git a/src/iter/interleave.rs b/src/iter/interleave.rs
index b5d43d5..3cacc49 100644
--- a/src/iter/interleave.rs
+++ b/src/iter/interleave.rs
@@ -310,16 +310,16 @@ where
{
#[inline]
fn next_back(&mut self) -> Option<I::Item> {
- if self.i.len() == self.j.len() {
- if self.i_next {
- self.i.next_back()
- } else {
- self.j.next_back()
+ match self.i.len().cmp(&self.j.len()) {
+ Ordering::Less => self.j.next_back(),
+ Ordering::Equal => {
+ if self.i_next {
+ self.i.next_back()
+ } else {
+ self.j.next_back()
+ }
}
- } else if self.i.len() < self.j.len() {
- self.j.next_back()
- } else {
- self.i.next_back()
+ Ordering::Greater => self.i.next_back(),
}
}
}
diff --git a/src/iter/len.rs b/src/iter/len.rs
index e65b3c0..8ec7f33 100644
--- a/src/iter/len.rs
+++ b/src/iter/len.rs
@@ -3,9 +3,9 @@ use super::*;
use std::cmp;
/// `MinLen` is an iterator that imposes a minimum length on iterator splits.
-/// This struct is created by the [`min_len()`] method on [`IndexedParallelIterator`]
+/// This struct is created by the [`with_min_len()`] method on [`IndexedParallelIterator`]
///
-/// [`min_len()`]: trait.IndexedParallelIterator.html#method.min_len
+/// [`with_min_len()`]: trait.IndexedParallelIterator.html#method.with_min_len
/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html
#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
#[derive(Debug, Clone)]
@@ -137,9 +137,9 @@ where
}
/// `MaxLen` is an iterator that imposes a maximum length on iterator splits.
-/// This struct is created by the [`max_len()`] method on [`IndexedParallelIterator`]
+/// This struct is created by the [`with_max_len()`] method on [`IndexedParallelIterator`]
///
-/// [`max_len()`]: trait.IndexedParallelIterator.html#method.max_len
+/// [`with_max_len()`]: trait.IndexedParallelIterator.html#method.with_max_len
/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html
#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
#[derive(Debug, Clone)]
diff --git a/src/iter/map.rs b/src/iter/map.rs
index f2a35ff..da14d40 100644
--- a/src/iter/map.rs
+++ b/src/iter/map.rs
@@ -213,7 +213,7 @@ where
R: Send,
{
fn split_off_left(&self) -> Self {
- MapConsumer::new(self.base.split_off_left(), &self.map_op)
+ MapConsumer::new(self.base.split_off_left(), self.map_op)
}
fn to_reducer(&self) -> Self::Reducer {
diff --git a/src/iter/mod.rs b/src/iter/mod.rs
index edff1a6..e60ea16 100644
--- a/src/iter/mod.rs
+++ b/src/iter/mod.rs
@@ -119,6 +119,8 @@ mod flat_map_iter;
mod flatten;
mod flatten_iter;
mod fold;
+mod fold_chunks;
+mod fold_chunks_with;
mod for_each;
mod from_par_iter;
mod inspect;
@@ -139,9 +141,14 @@ mod reduce;
mod repeat;
mod rev;
mod skip;
+mod skip_any;
+mod skip_any_while;
mod splitter;
+mod step_by;
mod sum;
mod take;
+mod take_any;
+mod take_any_while;
mod try_fold;
mod try_reduce;
mod try_reduce_with;
@@ -165,6 +172,8 @@ pub use self::{
flatten::Flatten,
flatten_iter::FlattenIter,
fold::{Fold, FoldWith},
+ fold_chunks::FoldChunks,
+ fold_chunks_with::FoldChunksWith,
inspect::Inspect,
interleave::Interleave,
interleave_shortest::InterleaveShortest,
@@ -180,8 +189,13 @@ pub use self::{
repeat::{repeat, repeatn, Repeat, RepeatN},
rev::Rev,
skip::Skip,
+ skip_any::SkipAny,
+ skip_any_while::SkipAnyWhile,
splitter::{split, Split},
+ step_by::StepBy,
take::Take,
+ take_any::TakeAny,
+ take_any_while::TakeAnyWhile,
try_fold::{TryFold, TryFoldWith},
update::Update,
while_some::WhileSome,
@@ -189,10 +203,6 @@ pub use self::{
zip_eq::ZipEq,
};
-mod step_by;
-#[cfg(step_by)]
-pub use self::step_by::StepBy;
-
/// `IntoParallelIterator` implements the conversion to a [`ParallelIterator`].
///
/// By implementing `IntoParallelIterator` for a type, you define how it will
@@ -457,10 +467,10 @@ pub trait ParallelIterator: Sized + Send {
fn try_for_each<OP, R>(self, op: OP) -> R
where
OP: Fn(Self::Item) -> R + Sync + Send,
- R: Try<Ok = ()> + Send,
+ R: Try<Output = ()> + Send,
{
- fn ok<R: Try<Ok = ()>>(_: (), _: ()) -> R {
- R::from_ok(())
+ fn ok<R: Try<Output = ()>>(_: (), _: ()) -> R {
+ R::from_output(())
}
self.map(op).try_reduce(<()>::default, ok)
@@ -497,10 +507,10 @@ pub trait ParallelIterator: Sized + Send {
where
OP: Fn(&mut T, Self::Item) -> R + Sync + Send,
T: Send + Clone,
- R: Try<Ok = ()> + Send,
+ R: Try<Output = ()> + Send,
{
- fn ok<R: Try<Ok = ()>>(_: (), _: ()) -> R {
- R::from_ok(())
+ fn ok<R: Try<Output = ()>>(_: (), _: ()) -> R {
+ R::from_output(())
}
self.map_with(init, op).try_reduce(<()>::default, ok)
@@ -539,10 +549,10 @@ pub trait ParallelIterator: Sized + Send {
where
OP: Fn(&mut T, Self::Item) -> R + Sync + Send,
INIT: Fn() -> T + Sync + Send,
- R: Try<Ok = ()> + Send,
+ R: Try<Output = ()> + Send,
{
- fn ok<R: Try<Ok = ()>>(_: (), _: ()) -> R {
- R::from_ok(())
+ fn ok<R: Try<Output = ()>>(_: (), _: ()) -> R {
+ R::from_output(())
}
self.map_init(init, op).try_reduce(<()>::default, ok)
@@ -921,7 +931,7 @@ pub trait ParallelIterator: Sized + Send {
/// An adaptor that flattens serial-iterable `Item`s into one large iterator.
///
- /// See also [`flatten`](#method.flatten) and the analagous comparison of
+ /// See also [`flatten`](#method.flatten) and the analogous comparison of
/// [`flat_map_iter` versus `flat_map`](#flat_map_iter-versus-flat_map).
///
/// # Examples
@@ -1065,7 +1075,7 @@ pub trait ParallelIterator: Sized + Send {
where
OP: Fn(T, T) -> Self::Item + Sync + Send,
ID: Fn() -> T + Sync + Send,
- Self::Item: Try<Ok = T>,
+ Self::Item: Try<Output = T>,
{
try_reduce::try_reduce(self, identity, op)
}
@@ -1108,7 +1118,7 @@ pub trait ParallelIterator: Sized + Send {
fn try_reduce_with<T, OP>(self, op: OP) -> Option<Self::Item>
where
OP: Fn(T, T) -> Self::Item + Sync + Send,
- Self::Item: Try<Ok = T>,
+ Self::Item: Try<Output = T>,
{
try_reduce_with::try_reduce_with(self, op)
}
@@ -1124,7 +1134,7 @@ pub trait ParallelIterator: Sized + Send {
/// multiple sums. The number of results is nondeterministic, as
/// is the point where the breaks occur.
///
- /// So if did the same parallel fold (`fold(0, |a,b| a+b)`) on
+ /// So if we did the same parallel fold (`fold(0, |a,b| a+b)`) on
/// our example list, we might wind up with a sequence of two numbers,
/// like so:
///
@@ -1311,7 +1321,7 @@ pub trait ParallelIterator: Sized + Send {
where
F: Fn(T, Self::Item) -> R + Sync + Send,
ID: Fn() -> T + Sync + Send,
- R: Try<Ok = T> + Send,
+ R: Try<Output = T> + Send,
{
TryFold::new(self, identity, fold_op)
}
@@ -1337,7 +1347,7 @@ pub trait ParallelIterator: Sized + Send {
fn try_fold_with<F, T, R>(self, init: T, fold_op: F) -> TryFoldWith<Self, R, F>
where
F: Fn(T, Self::Item) -> R + Sync + Send,
- R: Try<Ok = T> + Send,
+ R: Try<Output = T> + Send,
T: Clone + Send,
{
TryFoldWith::new(self, init, fold_op)
@@ -2098,7 +2108,7 @@ pub trait ParallelIterator: Sized + Send {
/// Note: unlike the standard `Iterator::partition`, this allows distinct
/// collection types for the left and right items. This is more flexible,
/// but may require new type annotations when converting sequential code
- /// that used type inferrence assuming the two were the same.
+ /// that used type inference assuming the two were the same.
///
/// # Examples
///
@@ -2192,6 +2202,143 @@ pub trait ParallelIterator: Sized + Send {
Intersperse::new(self, element)
}
+ /// Creates an iterator that yields `n` elements from *anywhere* in the original iterator.
+ ///
+ /// This is similar to [`IndexedParallelIterator::take`] without being
+ /// constrained to the "first" `n` of the original iterator order. The
+ /// taken items will still maintain their relative order where that is
+ /// visible in `collect`, `reduce`, and similar outputs.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use rayon::prelude::*;
+ ///
+ /// let result: Vec<_> = (0..100)
+ /// .into_par_iter()
+ /// .filter(|&x| x % 2 == 0)
+ /// .take_any(5)
+ /// .collect();
+ ///
+ /// assert_eq!(result.len(), 5);
+ /// assert!(result.windows(2).all(|w| w[0] < w[1]));
+ /// ```
+ fn take_any(self, n: usize) -> TakeAny<Self> {
+ TakeAny::new(self, n)
+ }
+
+ /// Creates an iterator that skips `n` elements from *anywhere* in the original iterator.
+ ///
+ /// This is similar to [`IndexedParallelIterator::skip`] without being
+ /// constrained to the "first" `n` of the original iterator order. The
+ /// remaining items will still maintain their relative order where that is
+ /// visible in `collect`, `reduce`, and similar outputs.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use rayon::prelude::*;
+ ///
+ /// let result: Vec<_> = (0..100)
+ /// .into_par_iter()
+ /// .filter(|&x| x % 2 == 0)
+ /// .skip_any(5)
+ /// .collect();
+ ///
+ /// assert_eq!(result.len(), 45);
+ /// assert!(result.windows(2).all(|w| w[0] < w[1]));
+ /// ```
+ fn skip_any(self, n: usize) -> SkipAny<Self> {
+ SkipAny::new(self, n)
+ }
+
+ /// Creates an iterator that takes elements from *anywhere* in the original iterator
+ /// until the given `predicate` returns `false`.
+ ///
+ /// The `predicate` may be anything -- e.g. it could be checking a fact about the item, a
+ /// global condition unrelated to the item itself, or some combination thereof.
+ ///
+ /// If parallel calls to the `predicate` race and give different results, then the
+ /// `true` results will still take those particular items, while respecting the `false`
+ /// result from elsewhere to skip any further items.
+ ///
+ /// This is similar to [`Iterator::take_while`] without being constrained to the original
+ /// iterator order. The taken items will still maintain their relative order where that is
+ /// visible in `collect`, `reduce`, and similar outputs.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use rayon::prelude::*;
+ ///
+ /// let result: Vec<_> = (0..100)
+ /// .into_par_iter()
+ /// .take_any_while(|x| *x < 50)
+ /// .collect();
+ ///
+ /// assert!(result.len() <= 50);
+ /// assert!(result.windows(2).all(|w| w[0] < w[1]));
+ /// ```
+ ///
+ /// ```
+ /// use rayon::prelude::*;
+ /// use std::sync::atomic::AtomicUsize;
+ /// use std::sync::atomic::Ordering::Relaxed;
+ ///
+ /// // Collect any group of items that sum <= 1000
+ /// let quota = AtomicUsize::new(1000);
+ /// let result: Vec<_> = (0_usize..100)
+ /// .into_par_iter()
+ /// .take_any_while(|&x| {
+ /// quota.fetch_update(Relaxed, Relaxed, |q| q.checked_sub(x))
+ /// .is_ok()
+ /// })
+ /// .collect();
+ ///
+ /// let sum = result.iter().sum::<usize>();
+ /// assert!(matches!(sum, 902..=1000));
+ /// ```
+ fn take_any_while<P>(self, predicate: P) -> TakeAnyWhile<Self, P>
+ where
+ P: Fn(&Self::Item) -> bool + Sync + Send,
+ {
+ TakeAnyWhile::new(self, predicate)
+ }
+
+ /// Creates an iterator that skips elements from *anywhere* in the original iterator
+ /// until the given `predicate` returns `false`.
+ ///
+ /// The `predicate` may be anything -- e.g. it could be checking a fact about the item, a
+ /// global condition unrelated to the item itself, or some combination thereof.
+ ///
+ /// If parallel calls to the `predicate` race and give different results, then the
+ /// `true` results will still skip those particular items, while respecting the `false`
+ /// result from elsewhere to skip any further items.
+ ///
+ /// This is similar to [`Iterator::skip_while`] without being constrained to the original
+ /// iterator order. The remaining items will still maintain their relative order where that is
+ /// visible in `collect`, `reduce`, and similar outputs.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use rayon::prelude::*;
+ ///
+ /// let result: Vec<_> = (0..100)
+ /// .into_par_iter()
+ /// .skip_any_while(|x| *x < 50)
+ /// .collect();
+ ///
+ /// assert!(result.len() >= 50);
+ /// assert!(result.windows(2).all(|w| w[0] < w[1]));
+ /// ```
+ fn skip_any_while<P>(self, predicate: P) -> SkipAnyWhile<Self, P>
+ where
+ P: Fn(&Self::Item) -> bool + Sync + Send,
+ {
+ SkipAnyWhile::new(self, predicate)
+ }
+
/// Internal method used to define the behavior of this parallel
/// iterator. You should not need to call this directly.
///
@@ -2241,6 +2388,8 @@ impl<T: ParallelIterator> IntoParallelIterator for T {
/// those points.
///
/// **Note:** Not implemented for `u64`, `i64`, `u128`, or `i128` ranges
+// Waiting for `ExactSizeIterator::is_empty` to be stabilized. See rust-lang/rust#35428
+#[allow(clippy::len_without_is_empty)]
pub trait IndexedParallelIterator: ParallelIterator {
/// Collects the results of the iterator into the specified
/// vector. The vector is always truncated before execution
@@ -2339,13 +2488,18 @@ pub trait IndexedParallelIterator: ParallelIterator {
/// // we should never get here
/// assert_eq!(1, zipped.len());
/// ```
+ #[track_caller]
fn zip_eq<Z>(self, zip_op: Z) -> ZipEq<Self, Z::Iter>
where
Z: IntoParallelIterator,
Z::Iter: IndexedParallelIterator,
{
let zip_op_iter = zip_op.into_par_iter();
- assert_eq!(self.len(), zip_op_iter.len());
+ assert_eq!(
+ self.len(),
+ zip_op_iter.len(),
+ "iterators must have the same length"
+ );
ZipEq::new(self, zip_op_iter)
}
@@ -2410,11 +2564,95 @@ pub trait IndexedParallelIterator: ParallelIterator {
/// let r: Vec<Vec<i32>> = a.into_par_iter().chunks(3).collect();
/// assert_eq!(r, vec![vec![1,2,3], vec![4,5,6], vec![7,8,9], vec![10]]);
/// ```
+ #[track_caller]
fn chunks(self, chunk_size: usize) -> Chunks<Self> {
assert!(chunk_size != 0, "chunk_size must not be zero");
Chunks::new(self, chunk_size)
}
+ /// Splits an iterator into fixed-size chunks, performing a sequential [`fold()`] on
+ /// each chunk.
+ ///
+ /// Returns an iterator that produces a folded result for each chunk of items
+ /// produced by this iterator.
+ ///
+ /// This works essentially like:
+ ///
+ /// ```text
+ /// iter.chunks(chunk_size)
+ /// .map(|chunk|
+ /// chunk.into_iter()
+ /// .fold(identity, fold_op)
+ /// )
+ /// ```
+ ///
+ /// except there is no per-chunk allocation overhead.
+ ///
+ /// [`fold()`]: std::iter::Iterator#method.fold
+ ///
+ /// **Panics** if `chunk_size` is 0.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use rayon::prelude::*;
+ /// let nums = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+ /// let chunk_sums = nums.into_par_iter().fold_chunks(2, || 0, |a, n| a + n).collect::<Vec<_>>();
+ /// assert_eq!(chunk_sums, vec![3, 7, 11, 15, 19]);
+ /// ```
+ #[track_caller]
+ fn fold_chunks<T, ID, F>(
+ self,
+ chunk_size: usize,
+ identity: ID,
+ fold_op: F,
+ ) -> FoldChunks<Self, ID, F>
+ where
+ ID: Fn() -> T + Send + Sync,
+ F: Fn(T, Self::Item) -> T + Send + Sync,
+ T: Send,
+ {
+ assert!(chunk_size != 0, "chunk_size must not be zero");
+ FoldChunks::new(self, chunk_size, identity, fold_op)
+ }
+
+ /// Splits an iterator into fixed-size chunks, performing a sequential [`fold()`] on
+ /// each chunk.
+ ///
+ /// Returns an iterator that produces a folded result for each chunk of items
+ /// produced by this iterator.
+ ///
+ /// This works essentially like `fold_chunks(chunk_size, || init.clone(), fold_op)`,
+ /// except it doesn't require the `init` type to be `Sync`, nor any other form of
+ /// added synchronization.
+ ///
+ /// [`fold()`]: std::iter::Iterator#method.fold
+ ///
+ /// **Panics** if `chunk_size` is 0.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use rayon::prelude::*;
+ /// let nums = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+ /// let chunk_sums = nums.into_par_iter().fold_chunks_with(2, 0, |a, n| a + n).collect::<Vec<_>>();
+ /// assert_eq!(chunk_sums, vec![3, 7, 11, 15, 19]);
+ /// ```
+ #[track_caller]
+ fn fold_chunks_with<T, F>(
+ self,
+ chunk_size: usize,
+ init: T,
+ fold_op: F,
+ ) -> FoldChunksWith<Self, T, F>
+ where
+ T: Send + Clone,
+ F: Fn(T, Self::Item) -> T + Send + Sync,
+ {
+ assert!(chunk_size != 0, "chunk_size must not be zero");
+ FoldChunksWith::new(self, chunk_size, init, fold_op)
+ }
+
/// Lexicographically compares the elements of this `ParallelIterator` with those of
/// another.
///
@@ -2601,11 +2839,6 @@ pub trait IndexedParallelIterator: ParallelIterator {
///
/// assert_eq!(result, [3, 6, 9])
/// ```
- ///
- /// # Compatibility
- ///
- /// This method is only available on Rust 1.38 or greater.
- #[cfg(step_by)]
fn step_by(self, step: usize) -> StepBy<Self> {
StepBy::new(self, step)
}
@@ -2808,7 +3041,7 @@ pub trait IndexedParallelIterator: ParallelIterator {
}
/// Sets the minimum length of iterators desired to process in each
- /// thread. Rayon will not split any smaller than this length, but
+ /// rayon job. Rayon will not split any smaller than this length, but
/// of course an iterator could already be smaller to begin with.
///
/// Producers like `zip` and `interleave` will use greater of the two
@@ -2834,7 +3067,7 @@ pub trait IndexedParallelIterator: ParallelIterator {
}
/// Sets the maximum length of iterators desired to process in each
- /// thread. Rayon will try to split at least below this length,
+ /// rayon job. Rayon will try to split at least below this length,
/// unless that would put it below the length from `with_min_len()`.
/// For example, given min=10 and max=15, a length of 16 will not be
/// split any further.
@@ -3145,50 +3378,154 @@ pub trait ParallelDrainRange<Idx = usize> {
/// We hide the `Try` trait in a private module, as it's only meant to be a
/// stable clone of the standard library's `Try` trait, as yet unstable.
mod private {
+ use std::convert::Infallible;
+ use std::ops::ControlFlow::{self, Break, Continue};
+ use std::task::Poll;
+
/// Clone of `std::ops::Try`.
///
/// Implementing this trait is not permitted outside of `rayon`.
pub trait Try {
private_decl! {}
- type Ok;
- type Error;
- fn into_result(self) -> Result<Self::Ok, Self::Error>;
- fn from_ok(v: Self::Ok) -> Self;
- fn from_error(v: Self::Error) -> Self;
+ type Output;
+ type Residual;
+
+ fn from_output(output: Self::Output) -> Self;
+
+ fn from_residual(residual: Self::Residual) -> Self;
+
+ fn branch(self) -> ControlFlow<Self::Residual, Self::Output>;
+ }
+
+ impl<B, C> Try for ControlFlow<B, C> {
+ private_impl! {}
+
+ type Output = C;
+ type Residual = ControlFlow<B, Infallible>;
+
+ fn from_output(output: Self::Output) -> Self {
+ Continue(output)
+ }
+
+ fn from_residual(residual: Self::Residual) -> Self {
+ match residual {
+ Break(b) => Break(b),
+ Continue(_) => unreachable!(),
+ }
+ }
+
+ fn branch(self) -> ControlFlow<Self::Residual, Self::Output> {
+ match self {
+ Continue(c) => Continue(c),
+ Break(b) => Break(Break(b)),
+ }
+ }
}
impl<T> Try for Option<T> {
private_impl! {}
- type Ok = T;
- type Error = ();
+ type Output = T;
+ type Residual = Option<Infallible>;
- fn into_result(self) -> Result<T, ()> {
- self.ok_or(())
+ fn from_output(output: Self::Output) -> Self {
+ Some(output)
}
- fn from_ok(v: T) -> Self {
- Some(v)
+
+ fn from_residual(residual: Self::Residual) -> Self {
+ match residual {
+ None => None,
+ Some(_) => unreachable!(),
+ }
}
- fn from_error(_: ()) -> Self {
- None
+
+ fn branch(self) -> ControlFlow<Self::Residual, Self::Output> {
+ match self {
+ Some(c) => Continue(c),
+ None => Break(None),
+ }
}
}
impl<T, E> Try for Result<T, E> {
private_impl! {}
- type Ok = T;
- type Error = E;
+ type Output = T;
+ type Residual = Result<Infallible, E>;
- fn into_result(self) -> Result<T, E> {
- self
+ fn from_output(output: Self::Output) -> Self {
+ Ok(output)
}
- fn from_ok(v: T) -> Self {
- Ok(v)
+
+ fn from_residual(residual: Self::Residual) -> Self {
+ match residual {
+ Err(e) => Err(e),
+ Ok(_) => unreachable!(),
+ }
+ }
+
+ fn branch(self) -> ControlFlow<Self::Residual, Self::Output> {
+ match self {
+ Ok(c) => Continue(c),
+ Err(e) => Break(Err(e)),
+ }
}
- fn from_error(v: E) -> Self {
- Err(v)
+ }
+
+ impl<T, E> Try for Poll<Result<T, E>> {
+ private_impl! {}
+
+ type Output = Poll<T>;
+ type Residual = Result<Infallible, E>;
+
+ fn from_output(output: Self::Output) -> Self {
+ output.map(Ok)
+ }
+
+ fn from_residual(residual: Self::Residual) -> Self {
+ match residual {
+ Err(e) => Poll::Ready(Err(e)),
+ Ok(_) => unreachable!(),
+ }
+ }
+
+ fn branch(self) -> ControlFlow<Self::Residual, Self::Output> {
+ match self {
+ Poll::Pending => Continue(Poll::Pending),
+ Poll::Ready(Ok(c)) => Continue(Poll::Ready(c)),
+ Poll::Ready(Err(e)) => Break(Err(e)),
+ }
+ }
+ }
+
+ impl<T, E> Try for Poll<Option<Result<T, E>>> {
+ private_impl! {}
+
+ type Output = Poll<Option<T>>;
+ type Residual = Result<Infallible, E>;
+
+ fn from_output(output: Self::Output) -> Self {
+ match output {
+ Poll::Ready(o) => Poll::Ready(o.map(Ok)),
+ Poll::Pending => Poll::Pending,
+ }
+ }
+
+ fn from_residual(residual: Self::Residual) -> Self {
+ match residual {
+ Err(e) => Poll::Ready(Some(Err(e))),
+ Ok(_) => unreachable!(),
+ }
+ }
+
+ fn branch(self) -> ControlFlow<Self::Residual, Self::Output> {
+ match self {
+ Poll::Pending => Continue(Poll::Pending),
+ Poll::Ready(None) => Continue(Poll::Ready(None)),
+ Poll::Ready(Some(Ok(c))) => Continue(Poll::Ready(Some(c))),
+ Poll::Ready(Some(Err(e))) => Break(Err(e)),
+ }
}
}
}
diff --git a/src/iter/par_bridge.rs b/src/iter/par_bridge.rs
index 339ac1a..8398274 100644
--- a/src/iter/par_bridge.rs
+++ b/src/iter/par_bridge.rs
@@ -1,12 +1,9 @@
-use crossbeam_deque::{Steal, Stealer, Worker};
-
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
-use std::sync::{Mutex, TryLockError};
-use std::thread::yield_now;
+use std::sync::Mutex;
-use crate::current_num_threads;
use crate::iter::plumbing::{bridge_unindexed, Folder, UnindexedConsumer, UnindexedProducer};
use crate::iter::ParallelIterator;
+use crate::{current_num_threads, current_thread_index};
/// Conversion trait to convert an `Iterator` to a `ParallelIterator`.
///
@@ -78,71 +75,46 @@ where
where
C: UnindexedConsumer<Self::Item>,
{
- let split_count = AtomicUsize::new(current_num_threads());
- let worker = Worker::new_fifo();
- let stealer = worker.stealer();
- let done = AtomicBool::new(false);
- let iter = Mutex::new((self.iter, worker));
+ let num_threads = current_num_threads();
+ let threads_started: Vec<_> = (0..num_threads).map(|_| AtomicBool::new(false)).collect();
bridge_unindexed(
- IterParallelProducer {
- split_count: &split_count,
- done: &done,
- iter: &iter,
- items: stealer,
+ &IterParallelProducer {
+ split_count: AtomicUsize::new(num_threads),
+ iter: Mutex::new(self.iter.fuse()),
+ threads_started: &threads_started,
},
consumer,
)
}
}
-struct IterParallelProducer<'a, Iter: Iterator> {
- split_count: &'a AtomicUsize,
- done: &'a AtomicBool,
- iter: &'a Mutex<(Iter, Worker<Iter::Item>)>,
- items: Stealer<Iter::Item>,
-}
-
-// manual clone because T doesn't need to be Clone, but the derive assumes it should be
-impl<'a, Iter: Iterator + 'a> Clone for IterParallelProducer<'a, Iter> {
- fn clone(&self) -> Self {
- IterParallelProducer {
- split_count: self.split_count,
- done: self.done,
- iter: self.iter,
- items: self.items.clone(),
- }
- }
+struct IterParallelProducer<'a, Iter> {
+ split_count: AtomicUsize,
+ iter: Mutex<std::iter::Fuse<Iter>>,
+ threads_started: &'a [AtomicBool],
}
-impl<'a, Iter: Iterator + Send + 'a> UnindexedProducer for IterParallelProducer<'a, Iter>
-where
- Iter::Item: Send,
-{
+impl<Iter: Iterator + Send> UnindexedProducer for &IterParallelProducer<'_, Iter> {
type Item = Iter::Item;
fn split(self) -> (Self, Option<Self>) {
let mut count = self.split_count.load(Ordering::SeqCst);
loop {
- // Check if the iterator is exhausted *and* we've consumed every item from it.
- let done = self.done.load(Ordering::SeqCst) && self.items.is_empty();
-
- match count.checked_sub(1) {
- Some(new_count) if !done => {
- match self.split_count.compare_exchange_weak(
- count,
- new_count,
- Ordering::SeqCst,
- Ordering::SeqCst,
- ) {
- Ok(_) => return (self.clone(), Some(self)),
- Err(last_count) => count = last_count,
- }
- }
- _ => {
- return (self, None);
+ // Check if the iterator is exhausted
+ if let Some(new_count) = count.checked_sub(1) {
+ match self.split_count.compare_exchange_weak(
+ count,
+ new_count,
+ Ordering::SeqCst,
+ Ordering::SeqCst,
+ ) {
+ Ok(_) => return (self, Some(self)),
+ Err(last_count) => count = last_count,
}
+ } else {
+ return (self, None);
}
}
}
@@ -151,66 +123,39 @@ where
where
F: Folder<Self::Item>,
{
+ // Guard against work-stealing-induced recursion, in case `Iter::next()`
+ // calls rayon internally, so we don't deadlock our mutex. We might also
+ // be recursing via `folder` methods, which doesn't present a mutex hazard,
+ // but it's lower overhead for us to just check this once, rather than
+ // updating additional shared state on every mutex lock/unlock.
+ // (If this isn't a rayon thread, then there's no work-stealing anyway...)
+ if let Some(i) = current_thread_index() {
+ // Note: If the number of threads in the pool ever grows dynamically, then
+ // we'll end up sharing flags and may falsely detect recursion -- that's
+ // still fine for overall correctness, just not optimal for parallelism.
+ let thread_started = &self.threads_started[i % self.threads_started.len()];
+ if thread_started.swap(true, Ordering::Relaxed) {
+ // We can't make progress with a nested mutex, so just return and let
+ // the outermost loop continue with the rest of the iterator items.
+ return folder;
+ }
+ }
+
loop {
- match self.items.steal() {
- Steal::Success(it) => {
+ if let Ok(mut iter) = self.iter.lock() {
+ if let Some(it) = iter.next() {
+ drop(iter);
folder = folder.consume(it);
if folder.full() {
return folder;
}
+ } else {
+ return folder;
}
- Steal::Empty => {
- // Don't storm the mutex if we're already done.
- if self.done.load(Ordering::SeqCst) {
- // Someone might have pushed more between our `steal()` and `done.load()`
- if self.items.is_empty() {
- // The iterator is out of items, no use in continuing
- return folder;
- }
- } else {
- // our cache is out of items, time to load more from the iterator
- match self.iter.try_lock() {
- Ok(mut guard) => {
- // Check `done` again in case we raced with the previous lock
- // holder on its way out.
- if self.done.load(Ordering::SeqCst) {
- if self.items.is_empty() {
- return folder;
- }
- continue;
- }
-
- let count = current_num_threads();
- let count = (count * count) * 2;
-
- let (ref mut iter, ref worker) = *guard;
-
- // while worker.len() < count {
- // FIXME the new deque doesn't let us count items. We can just
- // push a number of items, but that doesn't consider active
- // stealers elsewhere.
- for _ in 0..count {
- if let Some(it) = iter.next() {
- worker.push(it);
- } else {
- self.done.store(true, Ordering::SeqCst);
- break;
- }
- }
- }
- Err(TryLockError::WouldBlock) => {
- // someone else has the mutex, just sit tight until it's ready
- yield_now(); //TODO: use a thread-pool-aware yield? (#548)
- }
- Err(TryLockError::Poisoned(_)) => {
- // any panics from other threads will have been caught by the pool,
- // and will be re-thrown when joined - just exit
- return folder;
- }
- }
- }
- }
- Steal::Retry => (),
+ } else {
+ // any panics from other threads will have been caught by the pool,
+ // and will be re-thrown when joined - just exit
+ return folder;
}
}
}
diff --git a/src/iter/plumbing/README.md b/src/iter/plumbing/README.md
index cd94eae..42d22ef 100644
--- a/src/iter/plumbing/README.md
+++ b/src/iter/plumbing/README.md
@@ -35,8 +35,8 @@ modes (which is why there are two):
more like a `for_each` call: each time a new item is produced, the
`consume` method is called with that item. (The traits themselves are
a bit more complex, as they support state that can be threaded
- through and ultimately reduced.) Unlike producers, there are two
- variants of consumers. The difference is how the split is performed:
+ through and ultimately reduced.) Like producers, there are two
+ variants of consumers which differ in how the split is performed:
- in the `Consumer` trait, splitting is done with `split_at`, which
accepts an index where the split should be performed. All
iterators can work in this mode. The resulting halves thus have an
@@ -124,7 +124,7 @@ implement `IndexedParallelIterator`.
The `bridge` function will then connect the consumer, which is
handling the `flat_map` and `for_each`, with the producer, which is
-handling the `zip` and its preecessors. It will split down until the
+handling the `zip` and its predecessors. It will split down until the
chunks seem reasonably small, then pull items from the producer and
feed them to the consumer.
diff --git a/src/iter/skip.rs b/src/iter/skip.rs
index 9983f16..2d0f947 100644
--- a/src/iter/skip.rs
+++ b/src/iter/skip.rs
@@ -79,9 +79,16 @@ where
where
P: Producer<Item = T>,
{
- let (before_skip, after_skip) = base.split_at(self.n);
- bridge_producer_consumer(self.n, before_skip, NoopConsumer);
- self.callback.callback(after_skip)
+ crate::in_place_scope(|scope| {
+ let Self { callback, n } = self;
+ let (before_skip, after_skip) = base.split_at(n);
+
+ // Run the skipped part separately for side effects.
+ // We'll still get any panics propagated back by the scope.
+ scope.spawn(move |_| bridge_producer_consumer(n, before_skip, NoopConsumer));
+
+ callback.callback(after_skip)
+ })
}
}
}
diff --git a/src/iter/skip_any.rs b/src/iter/skip_any.rs
new file mode 100644
index 0000000..0660a56
--- /dev/null
+++ b/src/iter/skip_any.rs
@@ -0,0 +1,144 @@
+use super::plumbing::*;
+use super::*;
+use std::sync::atomic::{AtomicUsize, Ordering};
+
+/// `SkipAny` is an iterator that skips over `n` elements from anywhere in `I`.
+/// This struct is created by the [`skip_any()`] method on [`ParallelIterator`]
+///
+/// [`skip_any()`]: trait.ParallelIterator.html#method.skip_any
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Clone, Debug)]
+pub struct SkipAny<I: ParallelIterator> {
+ base: I,
+ count: usize,
+}
+
+impl<I> SkipAny<I>
+where
+ I: ParallelIterator,
+{
+ /// Creates a new `SkipAny` iterator.
+ pub(super) fn new(base: I, count: usize) -> Self {
+ SkipAny { base, count }
+ }
+}
+
+impl<I> ParallelIterator for SkipAny<I>
+where
+ I: ParallelIterator,
+{
+ type Item = I::Item;
+
+ fn drive_unindexed<C>(self, consumer: C) -> C::Result
+ where
+ C: UnindexedConsumer<Self::Item>,
+ {
+ let consumer1 = SkipAnyConsumer {
+ base: consumer,
+ count: &AtomicUsize::new(self.count),
+ };
+ self.base.drive_unindexed(consumer1)
+ }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// Consumer implementation
+
+struct SkipAnyConsumer<'f, C> {
+ base: C,
+ count: &'f AtomicUsize,
+}
+
+impl<'f, T, C> Consumer<T> for SkipAnyConsumer<'f, C>
+where
+ C: Consumer<T>,
+ T: Send,
+{
+ type Folder = SkipAnyFolder<'f, C::Folder>;
+ type Reducer = C::Reducer;
+ type Result = C::Result;
+
+ fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) {
+ let (left, right, reducer) = self.base.split_at(index);
+ (
+ SkipAnyConsumer { base: left, ..self },
+ SkipAnyConsumer {
+ base: right,
+ ..self
+ },
+ reducer,
+ )
+ }
+
+ fn into_folder(self) -> Self::Folder {
+ SkipAnyFolder {
+ base: self.base.into_folder(),
+ count: self.count,
+ }
+ }
+
+ fn full(&self) -> bool {
+ self.base.full()
+ }
+}
+
+impl<'f, T, C> UnindexedConsumer<T> for SkipAnyConsumer<'f, C>
+where
+ C: UnindexedConsumer<T>,
+ T: Send,
+{
+ fn split_off_left(&self) -> Self {
+ SkipAnyConsumer {
+ base: self.base.split_off_left(),
+ ..*self
+ }
+ }
+
+ fn to_reducer(&self) -> Self::Reducer {
+ self.base.to_reducer()
+ }
+}
+
+struct SkipAnyFolder<'f, C> {
+ base: C,
+ count: &'f AtomicUsize,
+}
+
+fn checked_decrement(u: &AtomicUsize) -> bool {
+ u.fetch_update(Ordering::Relaxed, Ordering::Relaxed, |u| u.checked_sub(1))
+ .is_ok()
+}
+
+impl<'f, T, C> Folder<T> for SkipAnyFolder<'f, C>
+where
+ C: Folder<T>,
+{
+ type Result = C::Result;
+
+ fn consume(mut self, item: T) -> Self {
+ if !checked_decrement(self.count) {
+ self.base = self.base.consume(item);
+ }
+ self
+ }
+
+ fn consume_iter<I>(mut self, iter: I) -> Self
+ where
+ I: IntoIterator<Item = T>,
+ {
+ self.base = self.base.consume_iter(
+ iter.into_iter()
+ .skip_while(move |_| checked_decrement(self.count)),
+ );
+ self
+ }
+
+ fn complete(self) -> C::Result {
+ self.base.complete()
+ }
+
+ fn full(&self) -> bool {
+ self.base.full()
+ }
+}
diff --git a/src/iter/skip_any_while.rs b/src/iter/skip_any_while.rs
new file mode 100644
index 0000000..28b9e59
--- /dev/null
+++ b/src/iter/skip_any_while.rs
@@ -0,0 +1,166 @@
+use super::plumbing::*;
+use super::*;
+use std::fmt;
+use std::sync::atomic::{AtomicBool, Ordering};
+
+/// `SkipAnyWhile` is an iterator that skips over elements from anywhere in `I`
+/// until the callback returns `false`.
+/// This struct is created by the [`skip_any_while()`] method on [`ParallelIterator`]
+///
+/// [`skip_any_while()`]: trait.ParallelIterator.html#method.skip_any_while
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Clone)]
+pub struct SkipAnyWhile<I: ParallelIterator, P> {
+ base: I,
+ predicate: P,
+}
+
+impl<I: ParallelIterator + fmt::Debug, P> fmt::Debug for SkipAnyWhile<I, P> {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ f.debug_struct("SkipAnyWhile")
+ .field("base", &self.base)
+ .finish()
+ }
+}
+
+impl<I, P> SkipAnyWhile<I, P>
+where
+ I: ParallelIterator,
+{
+ /// Creates a new `SkipAnyWhile` iterator.
+ pub(super) fn new(base: I, predicate: P) -> Self {
+ SkipAnyWhile { base, predicate }
+ }
+}
+
+impl<I, P> ParallelIterator for SkipAnyWhile<I, P>
+where
+ I: ParallelIterator,
+ P: Fn(&I::Item) -> bool + Sync + Send,
+{
+ type Item = I::Item;
+
+ fn drive_unindexed<C>(self, consumer: C) -> C::Result
+ where
+ C: UnindexedConsumer<Self::Item>,
+ {
+ let consumer1 = SkipAnyWhileConsumer {
+ base: consumer,
+ predicate: &self.predicate,
+ skipping: &AtomicBool::new(true),
+ };
+ self.base.drive_unindexed(consumer1)
+ }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// Consumer implementation
+
+struct SkipAnyWhileConsumer<'p, C, P> {
+ base: C,
+ predicate: &'p P,
+ skipping: &'p AtomicBool,
+}
+
+impl<'p, T, C, P> Consumer<T> for SkipAnyWhileConsumer<'p, C, P>
+where
+ C: Consumer<T>,
+ P: Fn(&T) -> bool + Sync,
+{
+ type Folder = SkipAnyWhileFolder<'p, C::Folder, P>;
+ type Reducer = C::Reducer;
+ type Result = C::Result;
+
+ fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) {
+ let (left, right, reducer) = self.base.split_at(index);
+ (
+ SkipAnyWhileConsumer { base: left, ..self },
+ SkipAnyWhileConsumer {
+ base: right,
+ ..self
+ },
+ reducer,
+ )
+ }
+
+ fn into_folder(self) -> Self::Folder {
+ SkipAnyWhileFolder {
+ base: self.base.into_folder(),
+ predicate: self.predicate,
+ skipping: self.skipping,
+ }
+ }
+
+ fn full(&self) -> bool {
+ self.base.full()
+ }
+}
+
+impl<'p, T, C, P> UnindexedConsumer<T> for SkipAnyWhileConsumer<'p, C, P>
+where
+ C: UnindexedConsumer<T>,
+ P: Fn(&T) -> bool + Sync,
+{
+ fn split_off_left(&self) -> Self {
+ SkipAnyWhileConsumer {
+ base: self.base.split_off_left(),
+ ..*self
+ }
+ }
+
+ fn to_reducer(&self) -> Self::Reducer {
+ self.base.to_reducer()
+ }
+}
+
+struct SkipAnyWhileFolder<'p, C, P> {
+ base: C,
+ predicate: &'p P,
+ skipping: &'p AtomicBool,
+}
+
+fn skip<T>(item: &T, skipping: &AtomicBool, predicate: &impl Fn(&T) -> bool) -> bool {
+ if !skipping.load(Ordering::Relaxed) {
+ return false;
+ }
+ if predicate(item) {
+ return true;
+ }
+ skipping.store(false, Ordering::Relaxed);
+ false
+}
+
+impl<'p, T, C, P> Folder<T> for SkipAnyWhileFolder<'p, C, P>
+where
+ C: Folder<T>,
+ P: Fn(&T) -> bool + 'p,
+{
+ type Result = C::Result;
+
+ fn consume(mut self, item: T) -> Self {
+ if !skip(&item, self.skipping, self.predicate) {
+ self.base = self.base.consume(item);
+ }
+ self
+ }
+
+ fn consume_iter<I>(mut self, iter: I) -> Self
+ where
+ I: IntoIterator<Item = T>,
+ {
+ self.base = self.base.consume_iter(
+ iter.into_iter()
+ .skip_while(move |x| skip(x, self.skipping, self.predicate)),
+ );
+ self
+ }
+
+ fn complete(self) -> C::Result {
+ self.base.complete()
+ }
+
+ fn full(&self) -> bool {
+ self.base.full()
+ }
+}
diff --git a/src/iter/step_by.rs b/src/iter/step_by.rs
index 2002f42..94b8334 100644
--- a/src/iter/step_by.rs
+++ b/src/iter/step_by.rs
@@ -1,4 +1,3 @@
-#![cfg(step_by)]
use std::cmp::min;
use super::plumbing::*;
diff --git a/src/iter/take_any.rs b/src/iter/take_any.rs
new file mode 100644
index 0000000..e3992b3
--- /dev/null
+++ b/src/iter/take_any.rs
@@ -0,0 +1,144 @@
+use super::plumbing::*;
+use super::*;
+use std::sync::atomic::{AtomicUsize, Ordering};
+
+/// `TakeAny` is an iterator that iterates over `n` elements from anywhere in `I`.
+/// This struct is created by the [`take_any()`] method on [`ParallelIterator`]
+///
+/// [`take_any()`]: trait.ParallelIterator.html#method.take_any
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Clone, Debug)]
+pub struct TakeAny<I: ParallelIterator> {
+ base: I,
+ count: usize,
+}
+
+impl<I> TakeAny<I>
+where
+ I: ParallelIterator,
+{
+ /// Creates a new `TakeAny` iterator.
+ pub(super) fn new(base: I, count: usize) -> Self {
+ TakeAny { base, count }
+ }
+}
+
+impl<I> ParallelIterator for TakeAny<I>
+where
+ I: ParallelIterator,
+{
+ type Item = I::Item;
+
+ fn drive_unindexed<C>(self, consumer: C) -> C::Result
+ where
+ C: UnindexedConsumer<Self::Item>,
+ {
+ let consumer1 = TakeAnyConsumer {
+ base: consumer,
+ count: &AtomicUsize::new(self.count),
+ };
+ self.base.drive_unindexed(consumer1)
+ }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// Consumer implementation
+
+struct TakeAnyConsumer<'f, C> {
+ base: C,
+ count: &'f AtomicUsize,
+}
+
+impl<'f, T, C> Consumer<T> for TakeAnyConsumer<'f, C>
+where
+ C: Consumer<T>,
+ T: Send,
+{
+ type Folder = TakeAnyFolder<'f, C::Folder>;
+ type Reducer = C::Reducer;
+ type Result = C::Result;
+
+ fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) {
+ let (left, right, reducer) = self.base.split_at(index);
+ (
+ TakeAnyConsumer { base: left, ..self },
+ TakeAnyConsumer {
+ base: right,
+ ..self
+ },
+ reducer,
+ )
+ }
+
+ fn into_folder(self) -> Self::Folder {
+ TakeAnyFolder {
+ base: self.base.into_folder(),
+ count: self.count,
+ }
+ }
+
+ fn full(&self) -> bool {
+ self.count.load(Ordering::Relaxed) == 0 || self.base.full()
+ }
+}
+
+impl<'f, T, C> UnindexedConsumer<T> for TakeAnyConsumer<'f, C>
+where
+ C: UnindexedConsumer<T>,
+ T: Send,
+{
+ fn split_off_left(&self) -> Self {
+ TakeAnyConsumer {
+ base: self.base.split_off_left(),
+ ..*self
+ }
+ }
+
+ fn to_reducer(&self) -> Self::Reducer {
+ self.base.to_reducer()
+ }
+}
+
+struct TakeAnyFolder<'f, C> {
+ base: C,
+ count: &'f AtomicUsize,
+}
+
+fn checked_decrement(u: &AtomicUsize) -> bool {
+ u.fetch_update(Ordering::Relaxed, Ordering::Relaxed, |u| u.checked_sub(1))
+ .is_ok()
+}
+
+impl<'f, T, C> Folder<T> for TakeAnyFolder<'f, C>
+where
+ C: Folder<T>,
+{
+ type Result = C::Result;
+
+ fn consume(mut self, item: T) -> Self {
+ if checked_decrement(self.count) {
+ self.base = self.base.consume(item);
+ }
+ self
+ }
+
+ fn consume_iter<I>(mut self, iter: I) -> Self
+ where
+ I: IntoIterator<Item = T>,
+ {
+ self.base = self.base.consume_iter(
+ iter.into_iter()
+ .take_while(move |_| checked_decrement(self.count)),
+ );
+ self
+ }
+
+ fn complete(self) -> C::Result {
+ self.base.complete()
+ }
+
+ fn full(&self) -> bool {
+ self.count.load(Ordering::Relaxed) == 0 || self.base.full()
+ }
+}
diff --git a/src/iter/take_any_while.rs b/src/iter/take_any_while.rs
new file mode 100644
index 0000000..e6a91af
--- /dev/null
+++ b/src/iter/take_any_while.rs
@@ -0,0 +1,166 @@
+use super::plumbing::*;
+use super::*;
+use std::fmt;
+use std::sync::atomic::{AtomicBool, Ordering};
+
+/// `TakeAnyWhile` is an iterator that iterates over elements from anywhere in `I`
+/// until the callback returns `false`.
+/// This struct is created by the [`take_any_while()`] method on [`ParallelIterator`]
+///
+/// [`take_any_while()`]: trait.ParallelIterator.html#method.take_any_while
+/// [`ParallelIterator`]: trait.ParallelIterator.html
+#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
+#[derive(Clone)]
+pub struct TakeAnyWhile<I: ParallelIterator, P> {
+ base: I,
+ predicate: P,
+}
+
+impl<I: ParallelIterator + fmt::Debug, P> fmt::Debug for TakeAnyWhile<I, P> {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ f.debug_struct("TakeAnyWhile")
+ .field("base", &self.base)
+ .finish()
+ }
+}
+
+impl<I, P> TakeAnyWhile<I, P>
+where
+ I: ParallelIterator,
+{
+ /// Creates a new `TakeAnyWhile` iterator.
+ pub(super) fn new(base: I, predicate: P) -> Self {
+ TakeAnyWhile { base, predicate }
+ }
+}
+
+impl<I, P> ParallelIterator for TakeAnyWhile<I, P>
+where
+ I: ParallelIterator,
+ P: Fn(&I::Item) -> bool + Sync + Send,
+{
+ type Item = I::Item;
+
+ fn drive_unindexed<C>(self, consumer: C) -> C::Result
+ where
+ C: UnindexedConsumer<Self::Item>,
+ {
+ let consumer1 = TakeAnyWhileConsumer {
+ base: consumer,
+ predicate: &self.predicate,
+ taking: &AtomicBool::new(true),
+ };
+ self.base.drive_unindexed(consumer1)
+ }
+}
+
+/// ////////////////////////////////////////////////////////////////////////
+/// Consumer implementation
+
+struct TakeAnyWhileConsumer<'p, C, P> {
+ base: C,
+ predicate: &'p P,
+ taking: &'p AtomicBool,
+}
+
+impl<'p, T, C, P> Consumer<T> for TakeAnyWhileConsumer<'p, C, P>
+where
+ C: Consumer<T>,
+ P: Fn(&T) -> bool + Sync,
+{
+ type Folder = TakeAnyWhileFolder<'p, C::Folder, P>;
+ type Reducer = C::Reducer;
+ type Result = C::Result;
+
+ fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) {
+ let (left, right, reducer) = self.base.split_at(index);
+ (
+ TakeAnyWhileConsumer { base: left, ..self },
+ TakeAnyWhileConsumer {
+ base: right,
+ ..self
+ },
+ reducer,
+ )
+ }
+
+ fn into_folder(self) -> Self::Folder {
+ TakeAnyWhileFolder {
+ base: self.base.into_folder(),
+ predicate: self.predicate,
+ taking: self.taking,
+ }
+ }
+
+ fn full(&self) -> bool {
+ !self.taking.load(Ordering::Relaxed) || self.base.full()
+ }
+}
+
+impl<'p, T, C, P> UnindexedConsumer<T> for TakeAnyWhileConsumer<'p, C, P>
+where
+ C: UnindexedConsumer<T>,
+ P: Fn(&T) -> bool + Sync,
+{
+ fn split_off_left(&self) -> Self {
+ TakeAnyWhileConsumer {
+ base: self.base.split_off_left(),
+ ..*self
+ }
+ }
+
+ fn to_reducer(&self) -> Self::Reducer {
+ self.base.to_reducer()
+ }
+}
+
+struct TakeAnyWhileFolder<'p, C, P> {
+ base: C,
+ predicate: &'p P,
+ taking: &'p AtomicBool,
+}
+
+fn take<T>(item: &T, taking: &AtomicBool, predicate: &impl Fn(&T) -> bool) -> bool {
+ if !taking.load(Ordering::Relaxed) {
+ return false;
+ }
+ if predicate(item) {
+ return true;
+ }
+ taking.store(false, Ordering::Relaxed);
+ false
+}
+
+impl<'p, T, C, P> Folder<T> for TakeAnyWhileFolder<'p, C, P>
+where
+ C: Folder<T>,
+ P: Fn(&T) -> bool + 'p,
+{
+ type Result = C::Result;
+
+ fn consume(mut self, item: T) -> Self {
+ if take(&item, self.taking, self.predicate) {
+ self.base = self.base.consume(item);
+ }
+ self
+ }
+
+ fn consume_iter<I>(mut self, iter: I) -> Self
+ where
+ I: IntoIterator<Item = T>,
+ {
+ self.base = self.base.consume_iter(
+ iter.into_iter()
+ .take_while(move |x| take(x, self.taking, self.predicate)),
+ );
+ self
+ }
+
+ fn complete(self) -> C::Result {
+ self.base.complete()
+ }
+
+ fn full(&self) -> bool {
+ !self.taking.load(Ordering::Relaxed) || self.base.full()
+ }
+}
diff --git a/src/iter/test.rs b/src/iter/test.rs
index bc5106b..c72068d 100644
--- a/src/iter/test.rs
+++ b/src/iter/test.rs
@@ -117,13 +117,10 @@ fn fold_map_reduce() {
let r1 = (0_i32..32)
.into_par_iter()
.with_max_len(1)
- .fold(
- || vec![],
- |mut v, e| {
- v.push(e);
- v
- },
- )
+ .fold(Vec::new, |mut v, e| {
+ v.push(e);
+ v
+ })
.map(|v| vec![v])
.reduce_with(|mut v_a, v_b| {
v_a.extend(v_b);
@@ -394,7 +391,7 @@ fn check_slice_mut_indexed() {
#[test]
fn check_vec_indexed() {
let a = vec![1, 2, 3];
- is_indexed(a.clone().into_par_iter());
+ is_indexed(a.into_par_iter());
}
#[test]
@@ -471,6 +468,7 @@ fn check_cmp_gt_to_seq() {
}
#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
fn check_cmp_short_circuit() {
// We only use a single thread in order to make the short-circuit behavior deterministic.
let pool = ThreadPoolBuilder::new().num_threads(1).build().unwrap();
@@ -500,6 +498,7 @@ fn check_cmp_short_circuit() {
}
#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
fn check_partial_cmp_short_circuit() {
// We only use a single thread to make the short-circuit behavior deterministic.
let pool = ThreadPoolBuilder::new().num_threads(1).build().unwrap();
@@ -529,6 +528,7 @@ fn check_partial_cmp_short_circuit() {
}
#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
fn check_partial_cmp_nan_short_circuit() {
// We only use a single thread to make the short-circuit behavior deterministic.
let pool = ThreadPoolBuilder::new().num_threads(1).build().unwrap();
@@ -1371,10 +1371,10 @@ fn check_find_is_present() {
let counter = AtomicUsize::new(0);
let value: Option<i32> = (0_i32..2048).into_par_iter().find_any(|&p| {
counter.fetch_add(1, Ordering::SeqCst);
- p >= 1024 && p < 1096
+ (1024..1096).contains(&p)
});
let q = value.unwrap();
- assert!(q >= 1024 && q < 1096);
+ assert!((1024..1096).contains(&q));
assert!(counter.load(Ordering::SeqCst) < 2048); // should not have visited every single one
}
@@ -1892,7 +1892,7 @@ fn check_either() {
// try an indexed iterator
let left: E = Either::Left(v.clone().into_par_iter());
- assert!(left.enumerate().eq(v.clone().into_par_iter().enumerate()));
+ assert!(left.enumerate().eq(v.into_par_iter().enumerate()));
}
#[test]
@@ -2063,7 +2063,7 @@ fn check_chunks_len() {
assert_eq!(4, (0..8).into_par_iter().chunks(2).len());
assert_eq!(3, (0..9).into_par_iter().chunks(3).len());
assert_eq!(3, (0..8).into_par_iter().chunks(3).len());
- assert_eq!(1, (&[1]).par_iter().chunks(3).len());
+ assert_eq!(1, [1].par_iter().chunks(3).len());
assert_eq!(0, (0..0).into_par_iter().chunks(3).len());
}
diff --git a/src/iter/try_fold.rs b/src/iter/try_fold.rs
index c1a57a4..6d1048d 100644
--- a/src/iter/try_fold.rs
+++ b/src/iter/try_fold.rs
@@ -1,15 +1,16 @@
use super::plumbing::*;
-use super::*;
+use super::ParallelIterator;
+use super::Try;
-use super::private::Try;
use std::fmt::{self, Debug};
use std::marker::PhantomData;
+use std::ops::ControlFlow::{self, Break, Continue};
impl<U, I, ID, F> TryFold<I, U, ID, F>
where
I: ParallelIterator,
- F: Fn(U::Ok, I::Item) -> U + Sync + Send,
- ID: Fn() -> U::Ok + Sync + Send,
+ F: Fn(U::Output, I::Item) -> U + Sync + Send,
+ ID: Fn() -> U::Output + Sync + Send,
U: Try + Send,
{
pub(super) fn new(base: I, identity: ID, fold_op: F) -> Self {
@@ -45,8 +46,8 @@ impl<U, I: ParallelIterator + Debug, ID, F> Debug for TryFold<I, U, ID, F> {
impl<U, I, ID, F> ParallelIterator for TryFold<I, U, ID, F>
where
I: ParallelIterator,
- F: Fn(U::Ok, I::Item) -> U + Sync + Send,
- ID: Fn() -> U::Ok + Sync + Send,
+ F: Fn(U::Output, I::Item) -> U + Sync + Send,
+ ID: Fn() -> U::Output + Sync + Send,
U: Try + Send,
{
type Item = U;
@@ -75,8 +76,8 @@ struct TryFoldConsumer<'c, U, C, ID, F> {
impl<'r, U, T, C, ID, F> Consumer<T> for TryFoldConsumer<'r, U, C, ID, F>
where
C: Consumer<U>,
- F: Fn(U::Ok, T) -> U + Sync,
- ID: Fn() -> U::Ok + Sync,
+ F: Fn(U::Output, T) -> U + Sync,
+ ID: Fn() -> U::Output + Sync,
U: Try + Send,
{
type Folder = TryFoldFolder<'r, C::Folder, U, F>;
@@ -98,7 +99,7 @@ where
fn into_folder(self) -> Self::Folder {
TryFoldFolder {
base: self.base.into_folder(),
- result: Ok((self.identity)()),
+ control: Continue((self.identity)()),
fold_op: self.fold_op,
}
}
@@ -111,8 +112,8 @@ where
impl<'r, U, T, C, ID, F> UnindexedConsumer<T> for TryFoldConsumer<'r, U, C, ID, F>
where
C: UnindexedConsumer<U>,
- F: Fn(U::Ok, T) -> U + Sync,
- ID: Fn() -> U::Ok + Sync,
+ F: Fn(U::Output, T) -> U + Sync,
+ ID: Fn() -> U::Output + Sync,
U: Try + Send,
{
fn split_off_left(&self) -> Self {
@@ -130,35 +131,38 @@ where
struct TryFoldFolder<'r, C, U: Try, F> {
base: C,
fold_op: &'r F,
- result: Result<U::Ok, U::Error>,
+ control: ControlFlow<U::Residual, U::Output>,
}
impl<'r, C, U, F, T> Folder<T> for TryFoldFolder<'r, C, U, F>
where
C: Folder<U>,
- F: Fn(U::Ok, T) -> U + Sync,
+ F: Fn(U::Output, T) -> U + Sync,
U: Try,
{
type Result = C::Result;
fn consume(mut self, item: T) -> Self {
let fold_op = self.fold_op;
- if let Ok(acc) = self.result {
- self.result = fold_op(acc, item).into_result();
+ if let Continue(acc) = self.control {
+ self.control = fold_op(acc, item).branch();
}
self
}
fn complete(self) -> C::Result {
- let item = match self.result {
- Ok(ok) => U::from_ok(ok),
- Err(error) => U::from_error(error),
+ let item = match self.control {
+ Continue(c) => U::from_output(c),
+ Break(r) => U::from_residual(r),
};
self.base.consume(item).complete()
}
fn full(&self) -> bool {
- self.result.is_err() || self.base.full()
+ match self.control {
+ Break(_) => true,
+ _ => self.base.full(),
+ }
}
}
@@ -167,11 +171,11 @@ where
impl<U, I, F> TryFoldWith<I, U, F>
where
I: ParallelIterator,
- F: Fn(U::Ok, I::Item) -> U + Sync,
+ F: Fn(U::Output, I::Item) -> U + Sync,
U: Try + Send,
- U::Ok: Clone + Send,
+ U::Output: Clone + Send,
{
- pub(super) fn new(base: I, item: U::Ok, fold_op: F) -> Self {
+ pub(super) fn new(base: I, item: U::Output, fold_op: F) -> Self {
TryFoldWith {
base,
item,
@@ -189,13 +193,13 @@ where
#[derive(Clone)]
pub struct TryFoldWith<I, U: Try, F> {
base: I,
- item: U::Ok,
+ item: U::Output,
fold_op: F,
}
impl<I: ParallelIterator + Debug, U: Try, F> Debug for TryFoldWith<I, U, F>
where
- U::Ok: Debug,
+ U::Output: Debug,
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("TryFoldWith")
@@ -208,9 +212,9 @@ where
impl<U, I, F> ParallelIterator for TryFoldWith<I, U, F>
where
I: ParallelIterator,
- F: Fn(U::Ok, I::Item) -> U + Sync + Send,
+ F: Fn(U::Output, I::Item) -> U + Sync + Send,
U: Try + Send,
- U::Ok: Clone + Send,
+ U::Output: Clone + Send,
{
type Item = U;
@@ -229,16 +233,16 @@ where
struct TryFoldWithConsumer<'c, C, U: Try, F> {
base: C,
- item: U::Ok,
+ item: U::Output,
fold_op: &'c F,
}
impl<'r, U, T, C, F> Consumer<T> for TryFoldWithConsumer<'r, C, U, F>
where
C: Consumer<U>,
- F: Fn(U::Ok, T) -> U + Sync,
+ F: Fn(U::Output, T) -> U + Sync,
U: Try + Send,
- U::Ok: Clone + Send,
+ U::Output: Clone + Send,
{
type Folder = TryFoldFolder<'r, C::Folder, U, F>;
type Reducer = C::Reducer;
@@ -263,7 +267,7 @@ where
fn into_folder(self) -> Self::Folder {
TryFoldFolder {
base: self.base.into_folder(),
- result: Ok(self.item),
+ control: Continue(self.item),
fold_op: self.fold_op,
}
}
@@ -276,9 +280,9 @@ where
impl<'r, U, T, C, F> UnindexedConsumer<T> for TryFoldWithConsumer<'r, C, U, F>
where
C: UnindexedConsumer<U>,
- F: Fn(U::Ok, T) -> U + Sync,
+ F: Fn(U::Output, T) -> U + Sync,
U: Try + Send,
- U::Ok: Clone + Send,
+ U::Output: Clone + Send,
{
fn split_off_left(&self) -> Self {
TryFoldWithConsumer {
diff --git a/src/iter/try_reduce.rs b/src/iter/try_reduce.rs
index 76b3850..35a724c 100644
--- a/src/iter/try_reduce.rs
+++ b/src/iter/try_reduce.rs
@@ -1,14 +1,15 @@
use super::plumbing::*;
use super::ParallelIterator;
+use super::Try;
-use super::private::Try;
+use std::ops::ControlFlow::{self, Break, Continue};
use std::sync::atomic::{AtomicBool, Ordering};
pub(super) fn try_reduce<PI, R, ID, T>(pi: PI, identity: ID, reduce_op: R) -> T
where
PI: ParallelIterator<Item = T>,
- R: Fn(T::Ok, T::Ok) -> T + Sync,
- ID: Fn() -> T::Ok + Sync,
+ R: Fn(T::Output, T::Output) -> T + Sync,
+ ID: Fn() -> T::Output + Sync,
T: Try + Send,
{
let full = AtomicBool::new(false);
@@ -36,8 +37,8 @@ impl<'r, R, ID> Clone for TryReduceConsumer<'r, R, ID> {
impl<'r, R, ID, T> Consumer<T> for TryReduceConsumer<'r, R, ID>
where
- R: Fn(T::Ok, T::Ok) -> T + Sync,
- ID: Fn() -> T::Ok + Sync,
+ R: Fn(T::Output, T::Output) -> T + Sync,
+ ID: Fn() -> T::Output + Sync,
T: Try + Send,
{
type Folder = TryReduceFolder<'r, R, T>;
@@ -51,7 +52,7 @@ where
fn into_folder(self) -> Self::Folder {
TryReduceFolder {
reduce_op: self.reduce_op,
- result: Ok((self.identity)()),
+ control: Continue((self.identity)()),
full: self.full,
}
}
@@ -63,8 +64,8 @@ where
impl<'r, R, ID, T> UnindexedConsumer<T> for TryReduceConsumer<'r, R, ID>
where
- R: Fn(T::Ok, T::Ok) -> T + Sync,
- ID: Fn() -> T::Ok + Sync,
+ R: Fn(T::Output, T::Output) -> T + Sync,
+ ID: Fn() -> T::Output + Sync,
T: Try + Send,
{
fn split_off_left(&self) -> Self {
@@ -78,52 +79,53 @@ where
impl<'r, R, ID, T> Reducer<T> for TryReduceConsumer<'r, R, ID>
where
- R: Fn(T::Ok, T::Ok) -> T + Sync,
+ R: Fn(T::Output, T::Output) -> T + Sync,
T: Try,
{
fn reduce(self, left: T, right: T) -> T {
- match (left.into_result(), right.into_result()) {
- (Ok(left), Ok(right)) => (self.reduce_op)(left, right),
- (Err(e), _) | (_, Err(e)) => T::from_error(e),
+ match (left.branch(), right.branch()) {
+ (Continue(left), Continue(right)) => (self.reduce_op)(left, right),
+ (Break(r), _) | (_, Break(r)) => T::from_residual(r),
}
}
}
struct TryReduceFolder<'r, R, T: Try> {
reduce_op: &'r R,
- result: Result<T::Ok, T::Error>,
+ control: ControlFlow<T::Residual, T::Output>,
full: &'r AtomicBool,
}
impl<'r, R, T> Folder<T> for TryReduceFolder<'r, R, T>
where
- R: Fn(T::Ok, T::Ok) -> T,
+ R: Fn(T::Output, T::Output) -> T,
T: Try,
{
type Result = T;
fn consume(mut self, item: T) -> Self {
let reduce_op = self.reduce_op;
- if let Ok(left) = self.result {
- self.result = match item.into_result() {
- Ok(right) => reduce_op(left, right).into_result(),
- Err(error) => Err(error),
- };
- }
- if self.result.is_err() {
- self.full.store(true, Ordering::Relaxed)
+ self.control = match (self.control, item.branch()) {
+ (Continue(left), Continue(right)) => reduce_op(left, right).branch(),
+ (control @ Break(_), _) | (_, control @ Break(_)) => control,
+ };
+ if let Break(_) = self.control {
+ self.full.store(true, Ordering::Relaxed);
}
self
}
fn complete(self) -> T {
- match self.result {
- Ok(ok) => T::from_ok(ok),
- Err(error) => T::from_error(error),
+ match self.control {
+ Continue(c) => T::from_output(c),
+ Break(r) => T::from_residual(r),
}
}
fn full(&self) -> bool {
- self.full.load(Ordering::Relaxed)
+ match self.control {
+ Break(_) => true,
+ _ => self.full.load(Ordering::Relaxed),
+ }
}
}
diff --git a/src/iter/try_reduce_with.rs b/src/iter/try_reduce_with.rs
index 6be3100..cd7c83e 100644
--- a/src/iter/try_reduce_with.rs
+++ b/src/iter/try_reduce_with.rs
@@ -1,13 +1,14 @@
use super::plumbing::*;
use super::ParallelIterator;
+use super::Try;
-use super::private::Try;
+use std::ops::ControlFlow::{self, Break, Continue};
use std::sync::atomic::{AtomicBool, Ordering};
pub(super) fn try_reduce_with<PI, R, T>(pi: PI, reduce_op: R) -> Option<T>
where
PI: ParallelIterator<Item = T>,
- R: Fn(T::Ok, T::Ok) -> T + Sync,
+ R: Fn(T::Output, T::Output) -> T + Sync,
T: Try + Send,
{
let full = AtomicBool::new(false);
@@ -33,7 +34,7 @@ impl<'r, R> Clone for TryReduceWithConsumer<'r, R> {
impl<'r, R, T> Consumer<T> for TryReduceWithConsumer<'r, R>
where
- R: Fn(T::Ok, T::Ok) -> T + Sync,
+ R: Fn(T::Output, T::Output) -> T + Sync,
T: Try + Send,
{
type Folder = TryReduceWithFolder<'r, R, T>;
@@ -47,7 +48,7 @@ where
fn into_folder(self) -> Self::Folder {
TryReduceWithFolder {
reduce_op: self.reduce_op,
- opt_result: None,
+ opt_control: None,
full: self.full,
}
}
@@ -59,7 +60,7 @@ where
impl<'r, R, T> UnindexedConsumer<T> for TryReduceWithConsumer<'r, R>
where
- R: Fn(T::Ok, T::Ok) -> T + Sync,
+ R: Fn(T::Output, T::Output) -> T + Sync,
T: Try + Send,
{
fn split_off_left(&self) -> Self {
@@ -73,62 +74,59 @@ where
impl<'r, R, T> Reducer<Option<T>> for TryReduceWithConsumer<'r, R>
where
- R: Fn(T::Ok, T::Ok) -> T + Sync,
+ R: Fn(T::Output, T::Output) -> T + Sync,
T: Try,
{
fn reduce(self, left: Option<T>, right: Option<T>) -> Option<T> {
let reduce_op = self.reduce_op;
match (left, right) {
- (None, x) | (x, None) => x,
- (Some(a), Some(b)) => match (a.into_result(), b.into_result()) {
- (Ok(a), Ok(b)) => Some(reduce_op(a, b)),
- (Err(e), _) | (_, Err(e)) => Some(T::from_error(e)),
+ (Some(left), Some(right)) => match (left.branch(), right.branch()) {
+ (Continue(left), Continue(right)) => Some(reduce_op(left, right)),
+ (Break(r), _) | (_, Break(r)) => Some(T::from_residual(r)),
},
+ (None, x) | (x, None) => x,
}
}
}
struct TryReduceWithFolder<'r, R, T: Try> {
reduce_op: &'r R,
- opt_result: Option<Result<T::Ok, T::Error>>,
+ opt_control: Option<ControlFlow<T::Residual, T::Output>>,
full: &'r AtomicBool,
}
impl<'r, R, T> Folder<T> for TryReduceWithFolder<'r, R, T>
where
- R: Fn(T::Ok, T::Ok) -> T,
+ R: Fn(T::Output, T::Output) -> T,
T: Try,
{
type Result = Option<T>;
- fn consume(self, item: T) -> Self {
+ fn consume(mut self, item: T) -> Self {
let reduce_op = self.reduce_op;
- let result = match self.opt_result {
- None => item.into_result(),
- Some(Ok(a)) => match item.into_result() {
- Ok(b) => reduce_op(a, b).into_result(),
- Err(e) => Err(e),
- },
- Some(Err(e)) => Err(e),
+ let control = match (self.opt_control, item.branch()) {
+ (Some(Continue(left)), Continue(right)) => reduce_op(left, right).branch(),
+ (Some(control @ Break(_)), _) | (_, control) => control,
};
- if result.is_err() {
+ if let Break(_) = control {
self.full.store(true, Ordering::Relaxed)
}
- TryReduceWithFolder {
- opt_result: Some(result),
- ..self
- }
+ self.opt_control = Some(control);
+ self
}
fn complete(self) -> Option<T> {
- let result = self.opt_result?;
- Some(match result {
- Ok(ok) => T::from_ok(ok),
- Err(error) => T::from_error(error),
- })
+ match self.opt_control {
+ Some(Continue(c)) => Some(T::from_output(c)),
+ Some(Break(r)) => Some(T::from_residual(r)),
+ None => None,
+ }
}
fn full(&self) -> bool {
- self.full.load(Ordering::Relaxed)
+ match self.opt_control {
+ Some(Break(_)) => true,
+ _ => self.full.load(Ordering::Relaxed),
+ }
}
}
diff --git a/src/iter/update.rs b/src/iter/update.rs
index 373a4d7..c693ac8 100644
--- a/src/iter/update.rs
+++ b/src/iter/update.rs
@@ -210,7 +210,7 @@ where
F: Fn(&mut T) + Send + Sync,
{
fn split_off_left(&self) -> Self {
- UpdateConsumer::new(self.base.split_off_left(), &self.update_op)
+ UpdateConsumer::new(self.base.split_off_left(), self.update_op)
}
fn to_reducer(&self) -> Self::Reducer {
diff --git a/src/lib.rs b/src/lib.rs
index b1e6fab..86f997b 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,4 +1,3 @@
-#![doc(html_root_url = "https://docs.rs/rayon/1.5")]
#![deny(missing_debug_implementations)]
#![deny(missing_docs)]
#![deny(unreachable_pub)]
@@ -77,6 +76,11 @@
//! [the `collections` from `std`]: https://doc.rust-lang.org/std/collections/index.html
//! [`std`]: https://doc.rust-lang.org/std/
//!
+//! # Targets without threading
+//!
+//! Rayon has limited support for targets without `std` threading implementations.
+//! See the [`rayon_core`] documentation for more information about its global fallback.
+//!
//! # Other questions?
//!
//! See [the Rayon FAQ][faq].
@@ -114,8 +118,43 @@ pub use rayon_core::ThreadBuilder;
pub use rayon_core::ThreadPool;
pub use rayon_core::ThreadPoolBuildError;
pub use rayon_core::ThreadPoolBuilder;
-pub use rayon_core::{current_num_threads, current_thread_index};
+pub use rayon_core::{broadcast, spawn_broadcast, BroadcastContext};
+pub use rayon_core::{current_num_threads, current_thread_index, max_num_threads};
pub use rayon_core::{in_place_scope, scope, Scope};
pub use rayon_core::{in_place_scope_fifo, scope_fifo, ScopeFifo};
pub use rayon_core::{join, join_context};
pub use rayon_core::{spawn, spawn_fifo};
+pub use rayon_core::{yield_local, yield_now, Yield};
+
+/// We need to transmit raw pointers across threads. It is possible to do this
+/// without any unsafe code by converting pointers to usize or to AtomicPtr<T>
+/// then back to a raw pointer for use. We prefer this approach because code
+/// that uses this type is more explicit.
+///
+/// Unsafe code is still required to dereference the pointer, so this type is
+/// not unsound on its own, although it does partly lift the unconditional
+/// !Send and !Sync on raw pointers. As always, dereference with care.
+struct SendPtr<T>(*mut T);
+
+// SAFETY: !Send for raw pointers is not for safety, just as a lint
+unsafe impl<T: Send> Send for SendPtr<T> {}
+
+// SAFETY: !Sync for raw pointers is not for safety, just as a lint
+unsafe impl<T: Send> Sync for SendPtr<T> {}
+
+impl<T> SendPtr<T> {
+ // Helper to avoid disjoint captures of `send_ptr.0`
+ fn get(self) -> *mut T {
+ self.0
+ }
+}
+
+// Implement Clone without the T: Clone bound from the derive
+impl<T> Clone for SendPtr<T> {
+ fn clone(&self) -> Self {
+ Self(self.0)
+ }
+}
+
+// Implement Copy without the T: Copy bound from the derive
+impl<T> Copy for SendPtr<T> {}
diff --git a/src/math.rs b/src/math.rs
index 9de5889..3a630be 100644
--- a/src/math.rs
+++ b/src/math.rs
@@ -1,7 +1,7 @@
use std::ops::{Bound, Range, RangeBounds};
/// Divide `n` by `divisor`, and round up to the nearest integer
-/// if not evenly divisable.
+/// if not evenly divisible.
#[inline]
pub(super) fn div_round_up(n: usize, divisor: usize) -> usize {
debug_assert!(divisor != 0, "Division by zero!");
diff --git a/src/slice/chunks.rs b/src/slice/chunks.rs
new file mode 100644
index 0000000..a3cc709
--- /dev/null
+++ b/src/slice/chunks.rs
@@ -0,0 +1,389 @@
+use crate::iter::plumbing::*;
+use crate::iter::*;
+use crate::math::div_round_up;
+use std::cmp;
+
+/// Parallel iterator over immutable non-overlapping chunks of a slice
+#[derive(Debug)]
+pub struct Chunks<'data, T: Sync> {
+ chunk_size: usize,
+ slice: &'data [T],
+}
+
+impl<'data, T: Sync> Chunks<'data, T> {
+ pub(super) fn new(chunk_size: usize, slice: &'data [T]) -> Self {
+ Self { chunk_size, slice }
+ }
+}
+
+impl<'data, T: Sync> Clone for Chunks<'data, T> {
+ fn clone(&self) -> Self {
+ Chunks { ..*self }
+ }
+}
+
+impl<'data, T: Sync + 'data> ParallelIterator for Chunks<'data, T> {
+ type Item = &'data [T];
+
+ fn drive_unindexed<C>(self, consumer: C) -> C::Result
+ where
+ C: UnindexedConsumer<Self::Item>,
+ {
+ bridge(self, consumer)
+ }
+
+ fn opt_len(&self) -> Option<usize> {
+ Some(self.len())
+ }
+}
+
+impl<'data, T: Sync + 'data> IndexedParallelIterator for Chunks<'data, T> {
+ fn drive<C>(self, consumer: C) -> C::Result
+ where
+ C: Consumer<Self::Item>,
+ {
+ bridge(self, consumer)
+ }
+
+ fn len(&self) -> usize {
+ div_round_up(self.slice.len(), self.chunk_size)
+ }
+
+ fn with_producer<CB>(self, callback: CB) -> CB::Output
+ where
+ CB: ProducerCallback<Self::Item>,
+ {
+ callback.callback(ChunksProducer {
+ chunk_size: self.chunk_size,
+ slice: self.slice,
+ })
+ }
+}
+
+struct ChunksProducer<'data, T: Sync> {
+ chunk_size: usize,
+ slice: &'data [T],
+}
+
+impl<'data, T: 'data + Sync> Producer for ChunksProducer<'data, T> {
+ type Item = &'data [T];
+ type IntoIter = ::std::slice::Chunks<'data, T>;
+
+ fn into_iter(self) -> Self::IntoIter {
+ self.slice.chunks(self.chunk_size)
+ }
+
+ fn split_at(self, index: usize) -> (Self, Self) {
+ let elem_index = cmp::min(index * self.chunk_size, self.slice.len());
+ let (left, right) = self.slice.split_at(elem_index);
+ (
+ ChunksProducer {
+ chunk_size: self.chunk_size,
+ slice: left,
+ },
+ ChunksProducer {
+ chunk_size: self.chunk_size,
+ slice: right,
+ },
+ )
+ }
+}
+
+/// Parallel iterator over immutable non-overlapping chunks of a slice
+#[derive(Debug)]
+pub struct ChunksExact<'data, T: Sync> {
+ chunk_size: usize,
+ slice: &'data [T],
+ rem: &'data [T],
+}
+
+impl<'data, T: Sync> ChunksExact<'data, T> {
+ pub(super) fn new(chunk_size: usize, slice: &'data [T]) -> Self {
+ let rem_len = slice.len() % chunk_size;
+ let len = slice.len() - rem_len;
+ let (slice, rem) = slice.split_at(len);
+ Self {
+ chunk_size,
+ slice,
+ rem,
+ }
+ }
+
+ /// Return the remainder of the original slice that is not going to be
+ /// returned by the iterator. The returned slice has at most `chunk_size-1`
+ /// elements.
+ pub fn remainder(&self) -> &'data [T] {
+ self.rem
+ }
+}
+
+impl<'data, T: Sync> Clone for ChunksExact<'data, T> {
+ fn clone(&self) -> Self {
+ ChunksExact { ..*self }
+ }
+}
+
+impl<'data, T: Sync + 'data> ParallelIterator for ChunksExact<'data, T> {
+ type Item = &'data [T];
+
+ fn drive_unindexed<C>(self, consumer: C) -> C::Result
+ where
+ C: UnindexedConsumer<Self::Item>,
+ {
+ bridge(self, consumer)
+ }
+
+ fn opt_len(&self) -> Option<usize> {
+ Some(self.len())
+ }
+}
+
+impl<'data, T: Sync + 'data> IndexedParallelIterator for ChunksExact<'data, T> {
+ fn drive<C>(self, consumer: C) -> C::Result
+ where
+ C: Consumer<Self::Item>,
+ {
+ bridge(self, consumer)
+ }
+
+ fn len(&self) -> usize {
+ self.slice.len() / self.chunk_size
+ }
+
+ fn with_producer<CB>(self, callback: CB) -> CB::Output
+ where
+ CB: ProducerCallback<Self::Item>,
+ {
+ callback.callback(ChunksExactProducer {
+ chunk_size: self.chunk_size,
+ slice: self.slice,
+ })
+ }
+}
+
+struct ChunksExactProducer<'data, T: Sync> {
+ chunk_size: usize,
+ slice: &'data [T],
+}
+
+impl<'data, T: 'data + Sync> Producer for ChunksExactProducer<'data, T> {
+ type Item = &'data [T];
+ type IntoIter = ::std::slice::ChunksExact<'data, T>;
+
+ fn into_iter(self) -> Self::IntoIter {
+ self.slice.chunks_exact(self.chunk_size)
+ }
+
+ fn split_at(self, index: usize) -> (Self, Self) {
+ let elem_index = index * self.chunk_size;
+ let (left, right) = self.slice.split_at(elem_index);
+ (
+ ChunksExactProducer {
+ chunk_size: self.chunk_size,
+ slice: left,
+ },
+ ChunksExactProducer {
+ chunk_size: self.chunk_size,
+ slice: right,
+ },
+ )
+ }
+}
+
+/// Parallel iterator over mutable non-overlapping chunks of a slice
+#[derive(Debug)]
+pub struct ChunksMut<'data, T: Send> {
+ chunk_size: usize,
+ slice: &'data mut [T],
+}
+
+impl<'data, T: Send> ChunksMut<'data, T> {
+ pub(super) fn new(chunk_size: usize, slice: &'data mut [T]) -> Self {
+ Self { chunk_size, slice }
+ }
+}
+
+impl<'data, T: Send + 'data> ParallelIterator for ChunksMut<'data, T> {
+ type Item = &'data mut [T];
+
+ fn drive_unindexed<C>(self, consumer: C) -> C::Result
+ where
+ C: UnindexedConsumer<Self::Item>,
+ {
+ bridge(self, consumer)
+ }
+
+ fn opt_len(&self) -> Option<usize> {
+ Some(self.len())
+ }
+}
+
+impl<'data, T: Send + 'data> IndexedParallelIterator for ChunksMut<'data, T> {
+ fn drive<C>(self, consumer: C) -> C::Result
+ where
+ C: Consumer<Self::Item>,
+ {
+ bridge(self, consumer)
+ }
+
+ fn len(&self) -> usize {
+ div_round_up(self.slice.len(), self.chunk_size)
+ }
+
+ fn with_producer<CB>(self, callback: CB) -> CB::Output
+ where
+ CB: ProducerCallback<Self::Item>,
+ {
+ callback.callback(ChunksMutProducer {
+ chunk_size: self.chunk_size,
+ slice: self.slice,
+ })
+ }
+}
+
+struct ChunksMutProducer<'data, T: Send> {
+ chunk_size: usize,
+ slice: &'data mut [T],
+}
+
+impl<'data, T: 'data + Send> Producer for ChunksMutProducer<'data, T> {
+ type Item = &'data mut [T];
+ type IntoIter = ::std::slice::ChunksMut<'data, T>;
+
+ fn into_iter(self) -> Self::IntoIter {
+ self.slice.chunks_mut(self.chunk_size)
+ }
+
+ fn split_at(self, index: usize) -> (Self, Self) {
+ let elem_index = cmp::min(index * self.chunk_size, self.slice.len());
+ let (left, right) = self.slice.split_at_mut(elem_index);
+ (
+ ChunksMutProducer {
+ chunk_size: self.chunk_size,
+ slice: left,
+ },
+ ChunksMutProducer {
+ chunk_size: self.chunk_size,
+ slice: right,
+ },
+ )
+ }
+}
+
+/// Parallel iterator over mutable non-overlapping chunks of a slice
+#[derive(Debug)]
+pub struct ChunksExactMut<'data, T: Send> {
+ chunk_size: usize,
+ slice: &'data mut [T],
+ rem: &'data mut [T],
+}
+
+impl<'data, T: Send> ChunksExactMut<'data, T> {
+ pub(super) fn new(chunk_size: usize, slice: &'data mut [T]) -> Self {
+ let rem_len = slice.len() % chunk_size;
+ let len = slice.len() - rem_len;
+ let (slice, rem) = slice.split_at_mut(len);
+ Self {
+ chunk_size,
+ slice,
+ rem,
+ }
+ }
+
+ /// Return the remainder of the original slice that is not going to be
+ /// returned by the iterator. The returned slice has at most `chunk_size-1`
+ /// elements.
+ ///
+ /// Note that this has to consume `self` to return the original lifetime of
+ /// the data, which prevents this from actually being used as a parallel
+ /// iterator since that also consumes. This method is provided for parity
+ /// with `std::iter::ChunksExactMut`, but consider calling `remainder()` or
+ /// `take_remainder()` as alternatives.
+ pub fn into_remainder(self) -> &'data mut [T] {
+ self.rem
+ }
+
+ /// Return the remainder of the original slice that is not going to be
+ /// returned by the iterator. The returned slice has at most `chunk_size-1`
+ /// elements.
+ ///
+ /// Consider `take_remainder()` if you need access to the data with its
+ /// original lifetime, rather than borrowing through `&mut self` here.
+ pub fn remainder(&mut self) -> &mut [T] {
+ self.rem
+ }
+
+ /// Return the remainder of the original slice that is not going to be
+ /// returned by the iterator. The returned slice has at most `chunk_size-1`
+ /// elements. Subsequent calls will return an empty slice.
+ pub fn take_remainder(&mut self) -> &'data mut [T] {
+ std::mem::take(&mut self.rem)
+ }
+}
+
+impl<'data, T: Send + 'data> ParallelIterator for ChunksExactMut<'data, T> {
+ type Item = &'data mut [T];
+
+ fn drive_unindexed<C>(self, consumer: C) -> C::Result
+ where
+ C: UnindexedConsumer<Self::Item>,
+ {
+ bridge(self, consumer)
+ }
+
+ fn opt_len(&self) -> Option<usize> {
+ Some(self.len())
+ }
+}
+
+impl<'data, T: Send + 'data> IndexedParallelIterator for ChunksExactMut<'data, T> {
+ fn drive<C>(self, consumer: C) -> C::Result
+ where
+ C: Consumer<Self::Item>,
+ {
+ bridge(self, consumer)
+ }
+
+ fn len(&self) -> usize {
+ self.slice.len() / self.chunk_size
+ }
+
+ fn with_producer<CB>(self, callback: CB) -> CB::Output
+ where
+ CB: ProducerCallback<Self::Item>,
+ {
+ callback.callback(ChunksExactMutProducer {
+ chunk_size: self.chunk_size,
+ slice: self.slice,
+ })
+ }
+}
+
+struct ChunksExactMutProducer<'data, T: Send> {
+ chunk_size: usize,
+ slice: &'data mut [T],
+}
+
+impl<'data, T: 'data + Send> Producer for ChunksExactMutProducer<'data, T> {
+ type Item = &'data mut [T];
+ type IntoIter = ::std::slice::ChunksExactMut<'data, T>;
+
+ fn into_iter(self) -> Self::IntoIter {
+ self.slice.chunks_exact_mut(self.chunk_size)
+ }
+
+ fn split_at(self, index: usize) -> (Self, Self) {
+ let elem_index = index * self.chunk_size;
+ let (left, right) = self.slice.split_at_mut(elem_index);
+ (
+ ChunksExactMutProducer {
+ chunk_size: self.chunk_size,
+ slice: left,
+ },
+ ChunksExactMutProducer {
+ chunk_size: self.chunk_size,
+ slice: right,
+ },
+ )
+ }
+}
diff --git a/src/slice/mergesort.rs b/src/slice/mergesort.rs
index e9a5d43..fec309d 100644
--- a/src/slice/mergesort.rs
+++ b/src/slice/mergesort.rs
@@ -6,6 +6,7 @@
use crate::iter::*;
use crate::slice::ParallelSliceMut;
+use crate::SendPtr;
use std::mem;
use std::mem::size_of;
use std::ptr;
@@ -24,7 +25,7 @@ unsafe fn decrement_and_get<T>(ptr: &mut *mut T) -> *mut T {
/// When dropped, copies from `src` into `dest` a sequence of length `len`.
struct CopyOnDrop<T> {
- src: *mut T,
+ src: *const T,
dest: *mut T,
len: usize,
}
@@ -63,9 +64,7 @@ where
// performance than with the 2nd method.
//
// All methods were benchmarked, and the 3rd showed best results. So we chose that one.
- let mut tmp = NoDrop {
- value: Some(ptr::read(&v[0])),
- };
+ let tmp = mem::ManuallyDrop::new(ptr::read(&v[0]));
// Intermediate state of the insertion process is always tracked by `hole`, which
// serves two purposes:
@@ -78,13 +77,13 @@ where
// fill the hole in `v` with `tmp`, thus ensuring that `v` still holds every object it
// initially held exactly once.
let mut hole = InsertionHole {
- src: tmp.value.as_mut().unwrap(),
+ src: &*tmp,
dest: &mut v[1],
};
ptr::copy_nonoverlapping(&v[1], &mut v[0], 1);
for i in 2..v.len() {
- if !is_less(&v[i], tmp.value.as_ref().unwrap()) {
+ if !is_less(&v[i], &*tmp) {
break;
}
ptr::copy_nonoverlapping(&v[i], &mut v[i - 1], 1);
@@ -94,20 +93,9 @@ where
}
}
- // Holds a value, but never drops it.
- struct NoDrop<T> {
- value: Option<T>,
- }
-
- impl<T> Drop for NoDrop<T> {
- fn drop(&mut self) {
- mem::forget(self.value.take());
- }
- }
-
// When dropped, copies from `src` into `dest`.
struct InsertionHole<T> {
- src: *mut T,
+ src: *const T,
dest: *mut T,
}
@@ -217,8 +205,8 @@ where
impl<T> Drop for MergeHole<T> {
fn drop(&mut self) {
// `T` is not a zero-sized type, so it's okay to divide by its size.
- let len = (self.end as usize - self.start as usize) / size_of::<T>();
unsafe {
+ let len = self.end.offset_from(self.start) as usize;
ptr::copy_nonoverlapping(self.start, self.dest, len);
}
}
@@ -284,7 +272,7 @@ fn collapse(runs: &[Run]) -> Option<usize> {
/// Otherwise, it sorts the slice into non-descending order.
///
/// This merge sort borrows some (but not all) ideas from TimSort, which is described in detail
-/// [here](https://svn.python.org/projects/python/trunk/Objects/listsort.txt).
+/// [here](https://github.com/python/cpython/blob/main/Objects/listsort.txt).
///
/// The algorithm identifies strictly descending and non-descending subsequences, which are called
/// natural runs. There is a stack of pending runs yet to be merged. Each newly found run is pushed
@@ -294,7 +282,7 @@ fn collapse(runs: &[Run]) -> Option<usize> {
/// 1. for every `i` in `1..runs.len()`: `runs[i - 1].len > runs[i].len`
/// 2. for every `i` in `2..runs.len()`: `runs[i - 2].len > runs[i - 1].len + runs[i].len`
///
-/// The invariants ensure that the total running time is `O(n log n)` worst-case.
+/// The invariants ensure that the total running time is *O*(*n* \* log(*n*)) worst-case.
///
/// # Safety
///
@@ -497,12 +485,13 @@ where
// get copied into `dest_left` and `dest_right``.
mem::forget(s);
- // Convert the pointers to `usize` because `*mut T` is not `Send`.
- let dest_l = dest as usize;
- let dest_r = dest.add(left_l.len() + right_l.len()) as usize;
+ // Wrap pointers in SendPtr so that they can be sent to another thread
+ // See the documentation of SendPtr for a full explanation
+ let dest_l = SendPtr(dest);
+ let dest_r = SendPtr(dest.add(left_l.len() + right_l.len()));
rayon_core::join(
- || par_merge(left_l, right_l, dest_l as *mut T, is_less),
- || par_merge(left_r, right_r, dest_r as *mut T, is_less),
+ move || par_merge(left_l, right_l, dest_l.get(), is_less),
+ move || par_merge(left_r, right_r, dest_r.get(), is_less),
);
}
// Finally, `s` gets dropped if we used sequential merge, thus copying the remaining elements
@@ -580,7 +569,7 @@ unsafe fn recurse<T, F>(
// After recursive calls finish we'll have to merge chunks `(start, mid)` and `(mid, end)` from
// `src` into `dest`. If the current invocation has to store the result into `buf`, we'll
- // merge chunks from `v` into `buf`, and viceversa.
+ // merge chunks from `v` into `buf`, and vice versa.
//
// Recursive calls flip `into_buf` at each level of recursion. More concretely, `par_merge`
// merges chunks from `buf` into `v` at the first level, from `v` into `buf` at the second
@@ -598,12 +587,13 @@ unsafe fn recurse<T, F>(
len: end - start,
};
- // Convert the pointers to `usize` because `*mut T` is not `Send`.
- let v = v as usize;
- let buf = buf as usize;
+ // Wrap pointers in SendPtr so that they can be sent to another thread
+ // See the documentation of SendPtr for a full explanation
+ let v = SendPtr(v);
+ let buf = SendPtr(buf);
rayon_core::join(
- || recurse(v as *mut T, buf as *mut T, left, !into_buf, is_less),
- || recurse(v as *mut T, buf as *mut T, right, !into_buf, is_less),
+ move || recurse(v.get(), buf.get(), left, !into_buf, is_less),
+ move || recurse(v.get(), buf.get(), right, !into_buf, is_less),
);
// Everything went all right - recursive calls didn't panic.
@@ -667,18 +657,20 @@ where
// Split the slice into chunks and merge sort them in parallel.
// However, descending chunks will not be sorted - they will be simply left intact.
let mut iter = {
- // Convert the pointer to `usize` because `*mut T` is not `Send`.
- let buf = buf as usize;
+ // Wrap pointer in SendPtr so that it can be sent to another thread
+ // See the documentation of SendPtr for a full explanation
+ let buf = SendPtr(buf);
+ let is_less = &is_less;
v.par_chunks_mut(CHUNK_LENGTH)
.with_max_len(1)
.enumerate()
- .map(|(i, chunk)| {
+ .map(move |(i, chunk)| {
let l = CHUNK_LENGTH * i;
let r = l + chunk.len();
unsafe {
- let buf = (buf as *mut T).add(l);
- (l, r, mergesort(chunk, buf, &is_less))
+ let buf = buf.get().add(l);
+ (l, r, mergesort(chunk, buf, is_less))
}
})
.collect::<Vec<_>>()
@@ -739,7 +731,7 @@ mod tests {
check(&[1, 2, 2, 2, 2, 3], &[]);
check(&[], &[1, 2, 2, 2, 2, 3]);
- let ref mut rng = thread_rng();
+ let rng = &mut thread_rng();
for _ in 0..100 {
let limit: u32 = rng.gen_range(1..21);
diff --git a/src/slice/mod.rs b/src/slice/mod.rs
index d47b0d3..dab56de 100644
--- a/src/slice/mod.rs
+++ b/src/slice/mod.rs
@@ -5,8 +5,10 @@
//!
//! [std::slice]: https://doc.rust-lang.org/stable/std/slice/
+mod chunks;
mod mergesort;
mod quicksort;
+mod rchunks;
mod test;
@@ -18,8 +20,10 @@ use crate::split_producer::*;
use std::cmp;
use std::cmp::Ordering;
use std::fmt::{self, Debug};
+use std::mem;
-use super::math::div_round_up;
+pub use self::chunks::{Chunks, ChunksExact, ChunksExactMut, ChunksMut};
+pub use self::rchunks::{RChunks, RChunksExact, RChunksExactMut, RChunksMut};
/// Parallel extensions for slices.
pub trait ParallelSlice<T: Sync> {
@@ -81,12 +85,10 @@ pub trait ParallelSlice<T: Sync> {
/// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_chunks(2).collect();
/// assert_eq!(chunks, vec![&[1, 2][..], &[3, 4], &[5]]);
/// ```
+ #[track_caller]
fn par_chunks(&self, chunk_size: usize) -> Chunks<'_, T> {
assert!(chunk_size != 0, "chunk_size must not be zero");
- Chunks {
- chunk_size,
- slice: self.as_parallel_slice(),
- }
+ Chunks::new(chunk_size, self.as_parallel_slice())
}
/// Returns a parallel iterator over `chunk_size` elements of
@@ -103,17 +105,50 @@ pub trait ParallelSlice<T: Sync> {
/// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_chunks_exact(2).collect();
/// assert_eq!(chunks, vec![&[1, 2][..], &[3, 4]]);
/// ```
+ #[track_caller]
fn par_chunks_exact(&self, chunk_size: usize) -> ChunksExact<'_, T> {
assert!(chunk_size != 0, "chunk_size must not be zero");
- let slice = self.as_parallel_slice();
- let rem = slice.len() % chunk_size;
- let len = slice.len() - rem;
- let (fst, snd) = slice.split_at(len);
- ChunksExact {
- chunk_size,
- slice: fst,
- rem: snd,
- }
+ ChunksExact::new(chunk_size, self.as_parallel_slice())
+ }
+
+ /// Returns a parallel iterator over at most `chunk_size` elements of `self` at a time,
+ /// starting at the end. The chunks do not overlap.
+ ///
+ /// If the number of elements in the iterator is not divisible by
+ /// `chunk_size`, the last chunk may be shorter than `chunk_size`. All
+ /// other chunks will have that exact length.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use rayon::prelude::*;
+ /// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_rchunks(2).collect();
+ /// assert_eq!(chunks, vec![&[4, 5][..], &[2, 3], &[1]]);
+ /// ```
+ #[track_caller]
+ fn par_rchunks(&self, chunk_size: usize) -> RChunks<'_, T> {
+ assert!(chunk_size != 0, "chunk_size must not be zero");
+ RChunks::new(chunk_size, self.as_parallel_slice())
+ }
+
+ /// Returns a parallel iterator over `chunk_size` elements of `self` at a time,
+ /// starting at the end. The chunks do not overlap.
+ ///
+ /// If `chunk_size` does not divide the length of the slice, then the
+ /// last up to `chunk_size-1` elements will be omitted and can be
+ /// retrieved from the remainder function of the iterator.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use rayon::prelude::*;
+ /// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_rchunks_exact(2).collect();
+ /// assert_eq!(chunks, vec![&[4, 5][..], &[2, 3]]);
+ /// ```
+ #[track_caller]
+ fn par_rchunks_exact(&self, chunk_size: usize) -> RChunksExact<'_, T> {
+ assert!(chunk_size != 0, "chunk_size must not be zero");
+ RChunksExact::new(chunk_size, self.as_parallel_slice())
}
}
@@ -168,12 +203,10 @@ pub trait ParallelSliceMut<T: Send> {
/// .for_each(|slice| slice.reverse());
/// assert_eq!(array, [2, 1, 4, 3, 5]);
/// ```
+ #[track_caller]
fn par_chunks_mut(&mut self, chunk_size: usize) -> ChunksMut<'_, T> {
assert!(chunk_size != 0, "chunk_size must not be zero");
- ChunksMut {
- chunk_size,
- slice: self.as_parallel_slice_mut(),
- }
+ ChunksMut::new(chunk_size, self.as_parallel_slice_mut())
}
/// Returns a parallel iterator over `chunk_size` elements of
@@ -192,22 +225,59 @@ pub trait ParallelSliceMut<T: Send> {
/// .for_each(|slice| slice.reverse());
/// assert_eq!(array, [3, 2, 1, 4, 5]);
/// ```
+ #[track_caller]
fn par_chunks_exact_mut(&mut self, chunk_size: usize) -> ChunksExactMut<'_, T> {
assert!(chunk_size != 0, "chunk_size must not be zero");
- let slice = self.as_parallel_slice_mut();
- let rem = slice.len() % chunk_size;
- let len = slice.len() - rem;
- let (fst, snd) = slice.split_at_mut(len);
- ChunksExactMut {
- chunk_size,
- slice: fst,
- rem: snd,
- }
+ ChunksExactMut::new(chunk_size, self.as_parallel_slice_mut())
+ }
+
+ /// Returns a parallel iterator over at most `chunk_size` elements of `self` at a time,
+ /// starting at the end. The chunks are mutable and do not overlap.
+ ///
+ /// If the number of elements in the iterator is not divisible by
+ /// `chunk_size`, the last chunk may be shorter than `chunk_size`. All
+ /// other chunks will have that exact length.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use rayon::prelude::*;
+ /// let mut array = [1, 2, 3, 4, 5];
+ /// array.par_rchunks_mut(2)
+ /// .for_each(|slice| slice.reverse());
+ /// assert_eq!(array, [1, 3, 2, 5, 4]);
+ /// ```
+ #[track_caller]
+ fn par_rchunks_mut(&mut self, chunk_size: usize) -> RChunksMut<'_, T> {
+ assert!(chunk_size != 0, "chunk_size must not be zero");
+ RChunksMut::new(chunk_size, self.as_parallel_slice_mut())
+ }
+
+ /// Returns a parallel iterator over `chunk_size` elements of `self` at a time,
+ /// starting at the end. The chunks are mutable and do not overlap.
+ ///
+ /// If `chunk_size` does not divide the length of the slice, then the
+ /// last up to `chunk_size-1` elements will be omitted and can be
+ /// retrieved from the remainder function of the iterator.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use rayon::prelude::*;
+ /// let mut array = [1, 2, 3, 4, 5];
+ /// array.par_rchunks_exact_mut(3)
+ /// .for_each(|slice| slice.reverse());
+ /// assert_eq!(array, [1, 2, 5, 4, 3]);
+ /// ```
+ #[track_caller]
+ fn par_rchunks_exact_mut(&mut self, chunk_size: usize) -> RChunksExactMut<'_, T> {
+ assert!(chunk_size != 0, "chunk_size must not be zero");
+ RChunksExactMut::new(chunk_size, self.as_parallel_slice_mut())
}
/// Sorts the slice in parallel.
///
- /// This sort is stable (i.e. does not reorder equal elements) and `O(n log n)` worst-case.
+ /// This sort is stable (i.e., does not reorder equal elements) and *O*(*n* \* log(*n*)) worst-case.
///
/// When applicable, unstable sorting is preferred because it is generally faster than stable
/// sorting and it doesn't allocate auxiliary memory.
@@ -247,7 +317,25 @@ pub trait ParallelSliceMut<T: Send> {
/// Sorts the slice in parallel with a comparator function.
///
- /// This sort is stable (i.e. does not reorder equal elements) and `O(n log n)` worst-case.
+ /// This sort is stable (i.e., does not reorder equal elements) and *O*(*n* \* log(*n*)) worst-case.
+ ///
+ /// The comparator function must define a total ordering for the elements in the slice. If
+ /// the ordering is not total, the order of the elements is unspecified. An order is a
+ /// total order if it is (for all `a`, `b` and `c`):
+ ///
+ /// * total and antisymmetric: exactly one of `a < b`, `a == b` or `a > b` is true, and
+ /// * transitive, `a < b` and `b < c` implies `a < c`. The same must hold for both `==` and `>`.
+ ///
+ /// For example, while [`f64`] doesn't implement [`Ord`] because `NaN != NaN`, we can use
+ /// `partial_cmp` as our sort function when we know the slice doesn't contain a `NaN`.
+ ///
+ /// ```
+ /// use rayon::prelude::*;
+ ///
+ /// let mut floats = [5f64, 4.0, 1.0, 3.0, 2.0];
+ /// floats.par_sort_by(|a, b| a.partial_cmp(b).unwrap());
+ /// assert_eq!(floats, [1.0, 2.0, 3.0, 4.0, 5.0]);
+ /// ```
///
/// When applicable, unstable sorting is preferred because it is generally faster than stable
/// sorting and it doesn't allocate auxiliary memory.
@@ -292,7 +380,12 @@ pub trait ParallelSliceMut<T: Send> {
/// Sorts the slice in parallel with a key extraction function.
///
- /// This sort is stable (i.e. does not reorder equal elements) and `O(n log n)` worst-case.
+ /// This sort is stable (i.e., does not reorder equal elements) and *O*(*m* \* *n* \* log(*n*))
+ /// worst-case, where the key function is *O*(*m*).
+ ///
+ /// For expensive key functions (e.g. functions that are not simple property accesses or
+ /// basic operations), [`par_sort_by_cached_key`](#method.par_sort_by_cached_key) is likely to
+ /// be significantly faster, as it does not recompute element keys.
///
/// When applicable, unstable sorting is preferred because it is generally faster than stable
/// sorting and it doesn't allocate auxiliary memory.
@@ -323,27 +416,119 @@ pub trait ParallelSliceMut<T: Send> {
/// v.par_sort_by_key(|k| k.abs());
/// assert_eq!(v, [1, 2, -3, 4, -5]);
/// ```
- fn par_sort_by_key<B, F>(&mut self, f: F)
+ fn par_sort_by_key<K, F>(&mut self, f: F)
where
- B: Ord,
- F: Fn(&T) -> B + Sync,
+ K: Ord,
+ F: Fn(&T) -> K + Sync,
{
par_mergesort(self.as_parallel_slice_mut(), |a, b| f(a).lt(&f(b)));
}
- /// Sorts the slice in parallel, but may not preserve the order of equal elements.
+ /// Sorts the slice in parallel with a key extraction function.
///
- /// This sort is unstable (i.e. may reorder equal elements), in-place (i.e. does not allocate),
- /// and `O(n log n)` worst-case.
+ /// During sorting, the key function is called at most once per element, by using
+ /// temporary storage to remember the results of key evaluation.
+ /// The key function is called in parallel, so the order of calls is completely unspecified.
+ ///
+ /// This sort is stable (i.e., does not reorder equal elements) and *O*(*m* \* *n* + *n* \* log(*n*))
+ /// worst-case, where the key function is *O*(*m*).
+ ///
+ /// For simple key functions (e.g., functions that are property accesses or
+ /// basic operations), [`par_sort_by_key`](#method.par_sort_by_key) is likely to be
+ /// faster.
///
/// # Current implementation
///
- /// The current algorithm is based on Orson Peters' [pattern-defeating quicksort][pdqsort],
- /// which is a quicksort variant designed to be very fast on certain kinds of patterns,
- /// sometimes achieving linear time. It is randomized but deterministic, and falls back to
- /// heapsort on degenerate inputs.
+ /// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters,
+ /// which combines the fast average case of randomized quicksort with the fast worst case of
+ /// heapsort, while achieving linear time on slices with certain patterns. It uses some
+ /// randomization to avoid degenerate cases, but with a fixed seed to always provide
+ /// deterministic behavior.
+ ///
+ /// In the worst case, the algorithm allocates temporary storage in a `Vec<(K, usize)>` the
+ /// length of the slice.
+ ///
+ /// All quicksorts work in two stages: partitioning into two halves followed by recursive
+ /// calls. The partitioning phase is sequential, but the two recursive calls are performed in
+ /// parallel. Finally, after sorting the cached keys, the item positions are updated sequentially.
+ ///
+ /// [pdqsort]: https://github.com/orlp/pdqsort
+ ///
+ /// # Examples
///
- /// It is generally faster than stable sorting, except in a few special cases, e.g. when the
+ /// ```
+ /// use rayon::prelude::*;
+ ///
+ /// let mut v = [-5i32, 4, 32, -3, 2];
+ ///
+ /// v.par_sort_by_cached_key(|k| k.to_string());
+ /// assert!(v == [-3, -5, 2, 32, 4]);
+ /// ```
+ fn par_sort_by_cached_key<K, F>(&mut self, f: F)
+ where
+ F: Fn(&T) -> K + Sync,
+ K: Ord + Send,
+ {
+ let slice = self.as_parallel_slice_mut();
+ let len = slice.len();
+ if len < 2 {
+ return;
+ }
+
+ // Helper macro for indexing our vector by the smallest possible type, to reduce allocation.
+ macro_rules! sort_by_key {
+ ($t:ty) => {{
+ let mut indices: Vec<_> = slice
+ .par_iter_mut()
+ .enumerate()
+ .map(|(i, x)| (f(&*x), i as $t))
+ .collect();
+ // The elements of `indices` are unique, as they are indexed, so any sort will be
+ // stable with respect to the original slice. We use `sort_unstable` here because
+ // it requires less memory allocation.
+ indices.par_sort_unstable();
+ for i in 0..len {
+ let mut index = indices[i].1;
+ while (index as usize) < i {
+ index = indices[index as usize].1;
+ }
+ indices[i].1 = index;
+ slice.swap(i, index as usize);
+ }
+ }};
+ }
+
+ let sz_u8 = mem::size_of::<(K, u8)>();
+ let sz_u16 = mem::size_of::<(K, u16)>();
+ let sz_u32 = mem::size_of::<(K, u32)>();
+ let sz_usize = mem::size_of::<(K, usize)>();
+
+ if sz_u8 < sz_u16 && len <= (std::u8::MAX as usize) {
+ return sort_by_key!(u8);
+ }
+ if sz_u16 < sz_u32 && len <= (std::u16::MAX as usize) {
+ return sort_by_key!(u16);
+ }
+ if sz_u32 < sz_usize && len <= (std::u32::MAX as usize) {
+ return sort_by_key!(u32);
+ }
+ sort_by_key!(usize)
+ }
+
+ /// Sorts the slice in parallel, but might not preserve the order of equal elements.
+ ///
+ /// This sort is unstable (i.e., may reorder equal elements), in-place
+ /// (i.e., does not allocate), and *O*(*n* \* log(*n*)) worst-case.
+ ///
+ /// # Current implementation
+ ///
+ /// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters,
+ /// which combines the fast average case of randomized quicksort with the fast worst case of
+ /// heapsort, while achieving linear time on slices with certain patterns. It uses some
+ /// randomization to avoid degenerate cases, but with a fixed seed to always provide
+ /// deterministic behavior.
+ ///
+ /// It is typically faster than stable sorting, except in a few special cases, e.g., when the
/// slice consists of several concatenated sorted sequences.
///
/// All quicksorts work in two stages: partitioning into two halves followed by recursive
@@ -369,20 +554,39 @@ pub trait ParallelSliceMut<T: Send> {
par_quicksort(self.as_parallel_slice_mut(), T::lt);
}
- /// Sorts the slice in parallel with a comparator function, but may not preserve the order of
+ /// Sorts the slice in parallel with a comparator function, but might not preserve the order of
/// equal elements.
///
- /// This sort is unstable (i.e. may reorder equal elements), in-place (i.e. does not allocate),
- /// and `O(n log n)` worst-case.
+ /// This sort is unstable (i.e., may reorder equal elements), in-place
+ /// (i.e., does not allocate), and *O*(*n* \* log(*n*)) worst-case.
+ ///
+ /// The comparator function must define a total ordering for the elements in the slice. If
+ /// the ordering is not total, the order of the elements is unspecified. An order is a
+ /// total order if it is (for all `a`, `b` and `c`):
+ ///
+ /// * total and antisymmetric: exactly one of `a < b`, `a == b` or `a > b` is true, and
+ /// * transitive, `a < b` and `b < c` implies `a < c`. The same must hold for both `==` and `>`.
+ ///
+ /// For example, while [`f64`] doesn't implement [`Ord`] because `NaN != NaN`, we can use
+ /// `partial_cmp` as our sort function when we know the slice doesn't contain a `NaN`.
+ ///
+ /// ```
+ /// use rayon::prelude::*;
+ ///
+ /// let mut floats = [5f64, 4.0, 1.0, 3.0, 2.0];
+ /// floats.par_sort_unstable_by(|a, b| a.partial_cmp(b).unwrap());
+ /// assert_eq!(floats, [1.0, 2.0, 3.0, 4.0, 5.0]);
+ /// ```
///
/// # Current implementation
///
- /// The current algorithm is based on Orson Peters' [pattern-defeating quicksort][pdqsort],
- /// which is a quicksort variant designed to be very fast on certain kinds of patterns,
- /// sometimes achieving linear time. It is randomized but deterministic, and falls back to
- /// heapsort on degenerate inputs.
+ /// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters,
+ /// which combines the fast average case of randomized quicksort with the fast worst case of
+ /// heapsort, while achieving linear time on slices with certain patterns. It uses some
+ /// randomization to avoid degenerate cases, but with a fixed seed to always provide
+ /// deterministic behavior.
///
- /// It is generally faster than stable sorting, except in a few special cases, e.g. when the
+ /// It is typically faster than stable sorting, except in a few special cases, e.g., when the
/// slice consists of several concatenated sorted sequences.
///
/// All quicksorts work in two stages: partitioning into two halves followed by recursive
@@ -413,21 +617,24 @@ pub trait ParallelSliceMut<T: Send> {
});
}
- /// Sorts the slice in parallel with a key extraction function, but may not preserve the order
+ /// Sorts the slice in parallel with a key extraction function, but might not preserve the order
/// of equal elements.
///
- /// This sort is unstable (i.e. may reorder equal elements), in-place (i.e. does not allocate),
- /// and `O(n log n)` worst-case.
+ /// This sort is unstable (i.e., may reorder equal elements), in-place
+ /// (i.e., does not allocate), and *O*(m \* *n* \* log(*n*)) worst-case,
+ /// where the key function is *O*(*m*).
///
/// # Current implementation
///
- /// The current algorithm is based on Orson Peters' [pattern-defeating quicksort][pdqsort],
- /// which is a quicksort variant designed to be very fast on certain kinds of patterns,
- /// sometimes achieving linear time. It is randomized but deterministic, and falls back to
- /// heapsort on degenerate inputs.
+ /// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters,
+ /// which combines the fast average case of randomized quicksort with the fast worst case of
+ /// heapsort, while achieving linear time on slices with certain patterns. It uses some
+ /// randomization to avoid degenerate cases, but with a fixed seed to always provide
+ /// deterministic behavior.
///
- /// It is generally faster than stable sorting, except in a few special cases, e.g. when the
- /// slice consists of several concatenated sorted sequences.
+ /// Due to its key calling strategy, `par_sort_unstable_by_key` is likely to be slower than
+ /// [`par_sort_by_cached_key`](#method.par_sort_by_cached_key) in cases where the key function
+ /// is expensive.
///
/// All quicksorts work in two stages: partitioning into two halves followed by recursive
/// calls. The partitioning phase is sequential, but the two recursive calls are performed in
@@ -445,10 +652,10 @@ pub trait ParallelSliceMut<T: Send> {
/// v.par_sort_unstable_by_key(|k| k.abs());
/// assert_eq!(v, [1, 2, -3, 4, -5]);
/// ```
- fn par_sort_unstable_by_key<B, F>(&mut self, f: F)
+ fn par_sort_unstable_by_key<K, F>(&mut self, f: F)
where
- B: Ord,
- F: Fn(&T) -> B + Sync,
+ K: Ord,
+ F: Fn(&T) -> K + Sync,
{
par_quicksort(self.as_parallel_slice_mut(), |a, b| f(a).lt(&f(b)));
}
@@ -544,176 +751,6 @@ impl<'data, T: 'data + Sync> Producer for IterProducer<'data, T> {
}
}
-/// Parallel iterator over immutable non-overlapping chunks of a slice
-#[derive(Debug)]
-pub struct Chunks<'data, T: Sync> {
- chunk_size: usize,
- slice: &'data [T],
-}
-
-impl<'data, T: Sync> Clone for Chunks<'data, T> {
- fn clone(&self) -> Self {
- Chunks { ..*self }
- }
-}
-
-impl<'data, T: Sync + 'data> ParallelIterator for Chunks<'data, T> {
- type Item = &'data [T];
-
- fn drive_unindexed<C>(self, consumer: C) -> C::Result
- where
- C: UnindexedConsumer<Self::Item>,
- {
- bridge(self, consumer)
- }
-
- fn opt_len(&self) -> Option<usize> {
- Some(self.len())
- }
-}
-
-impl<'data, T: Sync + 'data> IndexedParallelIterator for Chunks<'data, T> {
- fn drive<C>(self, consumer: C) -> C::Result
- where
- C: Consumer<Self::Item>,
- {
- bridge(self, consumer)
- }
-
- fn len(&self) -> usize {
- div_round_up(self.slice.len(), self.chunk_size)
- }
-
- fn with_producer<CB>(self, callback: CB) -> CB::Output
- where
- CB: ProducerCallback<Self::Item>,
- {
- callback.callback(ChunksProducer {
- chunk_size: self.chunk_size,
- slice: self.slice,
- })
- }
-}
-
-struct ChunksProducer<'data, T: Sync> {
- chunk_size: usize,
- slice: &'data [T],
-}
-
-impl<'data, T: 'data + Sync> Producer for ChunksProducer<'data, T> {
- type Item = &'data [T];
- type IntoIter = ::std::slice::Chunks<'data, T>;
-
- fn into_iter(self) -> Self::IntoIter {
- self.slice.chunks(self.chunk_size)
- }
-
- fn split_at(self, index: usize) -> (Self, Self) {
- let elem_index = cmp::min(index * self.chunk_size, self.slice.len());
- let (left, right) = self.slice.split_at(elem_index);
- (
- ChunksProducer {
- chunk_size: self.chunk_size,
- slice: left,
- },
- ChunksProducer {
- chunk_size: self.chunk_size,
- slice: right,
- },
- )
- }
-}
-
-/// Parallel iterator over immutable non-overlapping chunks of a slice
-#[derive(Debug)]
-pub struct ChunksExact<'data, T: Sync> {
- chunk_size: usize,
- slice: &'data [T],
- rem: &'data [T],
-}
-
-impl<'data, T: Sync> ChunksExact<'data, T> {
- /// Return the remainder of the original slice that is not going to be
- /// returned by the iterator. The returned slice has at most `chunk_size-1`
- /// elements.
- pub fn remainder(&self) -> &'data [T] {
- self.rem
- }
-}
-
-impl<'data, T: Sync> Clone for ChunksExact<'data, T> {
- fn clone(&self) -> Self {
- ChunksExact { ..*self }
- }
-}
-
-impl<'data, T: Sync + 'data> ParallelIterator for ChunksExact<'data, T> {
- type Item = &'data [T];
-
- fn drive_unindexed<C>(self, consumer: C) -> C::Result
- where
- C: UnindexedConsumer<Self::Item>,
- {
- bridge(self, consumer)
- }
-
- fn opt_len(&self) -> Option<usize> {
- Some(self.len())
- }
-}
-
-impl<'data, T: Sync + 'data> IndexedParallelIterator for ChunksExact<'data, T> {
- fn drive<C>(self, consumer: C) -> C::Result
- where
- C: Consumer<Self::Item>,
- {
- bridge(self, consumer)
- }
-
- fn len(&self) -> usize {
- self.slice.len() / self.chunk_size
- }
-
- fn with_producer<CB>(self, callback: CB) -> CB::Output
- where
- CB: ProducerCallback<Self::Item>,
- {
- callback.callback(ChunksExactProducer {
- chunk_size: self.chunk_size,
- slice: self.slice,
- })
- }
-}
-
-struct ChunksExactProducer<'data, T: Sync> {
- chunk_size: usize,
- slice: &'data [T],
-}
-
-impl<'data, T: 'data + Sync> Producer for ChunksExactProducer<'data, T> {
- type Item = &'data [T];
- type IntoIter = ::std::slice::ChunksExact<'data, T>;
-
- fn into_iter(self) -> Self::IntoIter {
- self.slice.chunks_exact(self.chunk_size)
- }
-
- fn split_at(self, index: usize) -> (Self, Self) {
- let elem_index = index * self.chunk_size;
- let (left, right) = self.slice.split_at(elem_index);
- (
- ChunksExactProducer {
- chunk_size: self.chunk_size,
- slice: left,
- },
- ChunksExactProducer {
- chunk_size: self.chunk_size,
- slice: right,
- },
- )
- }
-}
-
/// Parallel iterator over immutable overlapping windows of a slice
#[derive(Debug)]
pub struct Windows<'data, T: Sync> {
@@ -858,187 +895,6 @@ impl<'data, T: 'data + Send> Producer for IterMutProducer<'data, T> {
}
}
-/// Parallel iterator over mutable non-overlapping chunks of a slice
-#[derive(Debug)]
-pub struct ChunksMut<'data, T: Send> {
- chunk_size: usize,
- slice: &'data mut [T],
-}
-
-impl<'data, T: Send + 'data> ParallelIterator for ChunksMut<'data, T> {
- type Item = &'data mut [T];
-
- fn drive_unindexed<C>(self, consumer: C) -> C::Result
- where
- C: UnindexedConsumer<Self::Item>,
- {
- bridge(self, consumer)
- }
-
- fn opt_len(&self) -> Option<usize> {
- Some(self.len())
- }
-}
-
-impl<'data, T: Send + 'data> IndexedParallelIterator for ChunksMut<'data, T> {
- fn drive<C>(self, consumer: C) -> C::Result
- where
- C: Consumer<Self::Item>,
- {
- bridge(self, consumer)
- }
-
- fn len(&self) -> usize {
- div_round_up(self.slice.len(), self.chunk_size)
- }
-
- fn with_producer<CB>(self, callback: CB) -> CB::Output
- where
- CB: ProducerCallback<Self::Item>,
- {
- callback.callback(ChunksMutProducer {
- chunk_size: self.chunk_size,
- slice: self.slice,
- })
- }
-}
-
-struct ChunksMutProducer<'data, T: Send> {
- chunk_size: usize,
- slice: &'data mut [T],
-}
-
-impl<'data, T: 'data + Send> Producer for ChunksMutProducer<'data, T> {
- type Item = &'data mut [T];
- type IntoIter = ::std::slice::ChunksMut<'data, T>;
-
- fn into_iter(self) -> Self::IntoIter {
- self.slice.chunks_mut(self.chunk_size)
- }
-
- fn split_at(self, index: usize) -> (Self, Self) {
- let elem_index = cmp::min(index * self.chunk_size, self.slice.len());
- let (left, right) = self.slice.split_at_mut(elem_index);
- (
- ChunksMutProducer {
- chunk_size: self.chunk_size,
- slice: left,
- },
- ChunksMutProducer {
- chunk_size: self.chunk_size,
- slice: right,
- },
- )
- }
-}
-
-/// Parallel iterator over mutable non-overlapping chunks of a slice
-#[derive(Debug)]
-pub struct ChunksExactMut<'data, T: Send> {
- chunk_size: usize,
- slice: &'data mut [T],
- rem: &'data mut [T],
-}
-
-impl<'data, T: Send> ChunksExactMut<'data, T> {
- /// Return the remainder of the original slice that is not going to be
- /// returned by the iterator. The returned slice has at most `chunk_size-1`
- /// elements.
- ///
- /// Note that this has to consume `self` to return the original lifetime of
- /// the data, which prevents this from actually being used as a parallel
- /// iterator since that also consumes. This method is provided for parity
- /// with `std::iter::ChunksExactMut`, but consider calling `remainder()` or
- /// `take_remainder()` as alternatives.
- pub fn into_remainder(self) -> &'data mut [T] {
- self.rem
- }
-
- /// Return the remainder of the original slice that is not going to be
- /// returned by the iterator. The returned slice has at most `chunk_size-1`
- /// elements.
- ///
- /// Consider `take_remainder()` if you need access to the data with its
- /// original lifetime, rather than borrowing through `&mut self` here.
- pub fn remainder(&mut self) -> &mut [T] {
- self.rem
- }
-
- /// Return the remainder of the original slice that is not going to be
- /// returned by the iterator. The returned slice has at most `chunk_size-1`
- /// elements. Subsequent calls will return an empty slice.
- pub fn take_remainder(&mut self) -> &'data mut [T] {
- std::mem::replace(&mut self.rem, &mut [])
- }
-}
-
-impl<'data, T: Send + 'data> ParallelIterator for ChunksExactMut<'data, T> {
- type Item = &'data mut [T];
-
- fn drive_unindexed<C>(self, consumer: C) -> C::Result
- where
- C: UnindexedConsumer<Self::Item>,
- {
- bridge(self, consumer)
- }
-
- fn opt_len(&self) -> Option<usize> {
- Some(self.len())
- }
-}
-
-impl<'data, T: Send + 'data> IndexedParallelIterator for ChunksExactMut<'data, T> {
- fn drive<C>(self, consumer: C) -> C::Result
- where
- C: Consumer<Self::Item>,
- {
- bridge(self, consumer)
- }
-
- fn len(&self) -> usize {
- self.slice.len() / self.chunk_size
- }
-
- fn with_producer<CB>(self, callback: CB) -> CB::Output
- where
- CB: ProducerCallback<Self::Item>,
- {
- callback.callback(ChunksExactMutProducer {
- chunk_size: self.chunk_size,
- slice: self.slice,
- })
- }
-}
-
-struct ChunksExactMutProducer<'data, T: Send> {
- chunk_size: usize,
- slice: &'data mut [T],
-}
-
-impl<'data, T: 'data + Send> Producer for ChunksExactMutProducer<'data, T> {
- type Item = &'data mut [T];
- type IntoIter = ::std::slice::ChunksExactMut<'data, T>;
-
- fn into_iter(self) -> Self::IntoIter {
- self.slice.chunks_exact_mut(self.chunk_size)
- }
-
- fn split_at(self, index: usize) -> (Self, Self) {
- let elem_index = index * self.chunk_size;
- let (left, right) = self.slice.split_at_mut(elem_index);
- (
- ChunksExactMutProducer {
- chunk_size: self.chunk_size,
- slice: left,
- },
- ChunksExactMutProducer {
- chunk_size: self.chunk_size,
- slice: right,
- },
- )
- }
-}
-
/// Parallel iterator over slices separated by a predicate
pub struct Split<'data, T, P> {
slice: &'data [T],
diff --git a/src/slice/quicksort.rs b/src/slice/quicksort.rs
index 17c6f33..d71079e 100644
--- a/src/slice/quicksort.rs
+++ b/src/slice/quicksort.rs
@@ -5,46 +5,20 @@
//! `rayon_core::join`.
use std::cmp;
-use std::mem;
+use std::mem::{self, MaybeUninit};
use std::ptr;
-/// When dropped, takes the value out of `Option` and writes it into `dest`.
-///
-/// This allows us to safely read the pivot into a stack-allocated variable for efficiency, and
-/// write it back into the slice after partitioning. This way we ensure that the write happens
-/// even if `is_less` panics in the meantime.
-struct WriteOnDrop<T> {
- value: Option<T>,
- dest: *mut T,
-}
-
-impl<T> Drop for WriteOnDrop<T> {
- fn drop(&mut self) {
- unsafe {
- ptr::write(self.dest, self.value.take().unwrap());
- }
- }
-}
-
-/// Holds a value, but never drops it.
-struct NoDrop<T> {
- value: Option<T>,
-}
-
-impl<T> Drop for NoDrop<T> {
- fn drop(&mut self) {
- mem::forget(self.value.take());
- }
-}
-
/// When dropped, copies from `src` into `dest`.
struct CopyOnDrop<T> {
- src: *mut T,
+ src: *const T,
dest: *mut T,
}
impl<T> Drop for CopyOnDrop<T> {
fn drop(&mut self) {
+ // SAFETY: This is a helper class.
+ // Please refer to its usage for correctness.
+ // Namely, one must be sure that `src` and `dst` does not overlap as required by `ptr::copy_nonoverlapping`.
unsafe {
ptr::copy_nonoverlapping(self.src, self.dest, 1);
}
@@ -57,29 +31,43 @@ where
F: Fn(&T, &T) -> bool,
{
let len = v.len();
+ // SAFETY: The unsafe operations below involves indexing without a bounds check (by offsetting a
+ // pointer) and copying memory (`ptr::copy_nonoverlapping`).
+ //
+ // a. Indexing:
+ // 1. We checked the size of the array to >=2.
+ // 2. All the indexing that we will do is always between {0 <= index < len} at most.
+ //
+ // b. Memory copying
+ // 1. We are obtaining pointers to references which are guaranteed to be valid.
+ // 2. They cannot overlap because we obtain pointers to difference indices of the slice.
+ // Namely, `i` and `i-1`.
+ // 3. If the slice is properly aligned, the elements are properly aligned.
+ // It is the caller's responsibility to make sure the slice is properly aligned.
+ //
+ // See comments below for further detail.
unsafe {
// If the first two elements are out-of-order...
if len >= 2 && is_less(v.get_unchecked(1), v.get_unchecked(0)) {
// Read the first element into a stack-allocated variable. If a following comparison
// operation panics, `hole` will get dropped and automatically write the element back
// into the slice.
- let mut tmp = NoDrop {
- value: Some(ptr::read(v.get_unchecked(0))),
- };
+ let tmp = mem::ManuallyDrop::new(ptr::read(v.get_unchecked(0)));
+ let v = v.as_mut_ptr();
let mut hole = CopyOnDrop {
- src: tmp.value.as_mut().unwrap(),
- dest: v.get_unchecked_mut(1),
+ src: &*tmp,
+ dest: v.add(1),
};
- ptr::copy_nonoverlapping(v.get_unchecked(1), v.get_unchecked_mut(0), 1);
+ ptr::copy_nonoverlapping(v.add(1), v.add(0), 1);
for i in 2..len {
- if !is_less(v.get_unchecked(i), tmp.value.as_ref().unwrap()) {
+ if !is_less(&*v.add(i), &*tmp) {
break;
}
// Move `i`-th element one place to the left, thus shifting the hole to the right.
- ptr::copy_nonoverlapping(v.get_unchecked(i), v.get_unchecked_mut(i - 1), 1);
- hole.dest = v.get_unchecked_mut(i);
+ ptr::copy_nonoverlapping(v.add(i), v.add(i - 1), 1);
+ hole.dest = v.add(i);
}
// `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`.
}
@@ -92,29 +80,43 @@ where
F: Fn(&T, &T) -> bool,
{
let len = v.len();
+ // SAFETY: The unsafe operations below involves indexing without a bound check (by offsetting a
+ // pointer) and copying memory (`ptr::copy_nonoverlapping`).
+ //
+ // a. Indexing:
+ // 1. We checked the size of the array to >= 2.
+ // 2. All the indexing that we will do is always between `0 <= index < len-1` at most.
+ //
+ // b. Memory copying
+ // 1. We are obtaining pointers to references which are guaranteed to be valid.
+ // 2. They cannot overlap because we obtain pointers to difference indices of the slice.
+ // Namely, `i` and `i+1`.
+ // 3. If the slice is properly aligned, the elements are properly aligned.
+ // It is the caller's responsibility to make sure the slice is properly aligned.
+ //
+ // See comments below for further detail.
unsafe {
// If the last two elements are out-of-order...
if len >= 2 && is_less(v.get_unchecked(len - 1), v.get_unchecked(len - 2)) {
// Read the last element into a stack-allocated variable. If a following comparison
// operation panics, `hole` will get dropped and automatically write the element back
// into the slice.
- let mut tmp = NoDrop {
- value: Some(ptr::read(v.get_unchecked(len - 1))),
- };
+ let tmp = mem::ManuallyDrop::new(ptr::read(v.get_unchecked(len - 1)));
+ let v = v.as_mut_ptr();
let mut hole = CopyOnDrop {
- src: tmp.value.as_mut().unwrap(),
- dest: v.get_unchecked_mut(len - 2),
+ src: &*tmp,
+ dest: v.add(len - 2),
};
- ptr::copy_nonoverlapping(v.get_unchecked(len - 2), v.get_unchecked_mut(len - 1), 1);
+ ptr::copy_nonoverlapping(v.add(len - 2), v.add(len - 1), 1);
for i in (0..len - 2).rev() {
- if !is_less(&tmp.value.as_ref().unwrap(), v.get_unchecked(i)) {
+ if !is_less(&*tmp, &*v.add(i)) {
break;
}
// Move `i`-th element one place to the right, thus shifting the hole to the left.
- ptr::copy_nonoverlapping(v.get_unchecked(i), v.get_unchecked_mut(i + 1), 1);
- hole.dest = v.get_unchecked_mut(i);
+ ptr::copy_nonoverlapping(v.add(i), v.add(i + 1), 1);
+ hole.dest = v.add(i);
}
// `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`.
}
@@ -123,7 +125,7 @@ where
/// Partially sorts a slice by shifting several out-of-order elements around.
///
-/// Returns `true` if the slice is sorted at the end. This function is `O(n)` worst-case.
+/// Returns `true` if the slice is sorted at the end. This function is *O*(*n*) worst-case.
#[cold]
fn partial_insertion_sort<T, F>(v: &mut [T], is_less: &F) -> bool
where
@@ -138,6 +140,8 @@ where
let mut i = 1;
for _ in 0..MAX_STEPS {
+ // SAFETY: We already explicitly did the bound checking with `i < len`.
+ // All our subsequent indexing is only in the range `0 <= index < len`
unsafe {
// Find the next pair of adjacent out-of-order elements.
while i < len && !is_less(v.get_unchecked(i), v.get_unchecked(i - 1)) {
@@ -168,17 +172,17 @@ where
false
}
-/// Sorts a slice using insertion sort, which is `O(n^2)` worst-case.
+/// Sorts a slice using insertion sort, which is *O*(*n*^2) worst-case.
fn insertion_sort<T, F>(v: &mut [T], is_less: &F)
where
F: Fn(&T, &T) -> bool,
{
for i in 1..v.len() {
- shift_tail(&mut v[..=i], is_less);
+ shift_tail(&mut v[..i + 1], is_less);
}
}
-/// Sorts `v` using heapsort, which guarantees `O(n log n)` worst-case.
+/// Sorts `v` using heapsort, which guarantees *O*(*n* \* log(*n*)) worst-case.
#[cold]
fn heapsort<T, F>(v: &mut [T], is_less: &F)
where
@@ -187,25 +191,25 @@ where
// This binary heap respects the invariant `parent >= child`.
let sift_down = |v: &mut [T], mut node| {
loop {
- // Children of `node`:
- let left = 2 * node + 1;
- let right = 2 * node + 2;
+ // Children of `node`.
+ let mut child = 2 * node + 1;
+ if child >= v.len() {
+ break;
+ }
// Choose the greater child.
- let greater = if right < v.len() && is_less(&v[left], &v[right]) {
- right
- } else {
- left
- };
+ if child + 1 < v.len() && is_less(&v[child], &v[child + 1]) {
+ child += 1;
+ }
// Stop if the invariant holds at `node`.
- if greater >= v.len() || !is_less(&v[node], &v[greater]) {
+ if !is_less(&v[node], &v[child]) {
break;
}
// Swap `node` with the greater child, move one step down, and continue sifting.
- v.swap(node, greater);
- node = greater;
+ v.swap(node, child);
+ node = child;
}
};
@@ -250,23 +254,30 @@ where
// 3. `end` - End pointer into the `offsets` array.
// 4. `offsets - Indices of out-of-order elements within the block.
- // The current block on the left side (from `l` to `l.offset(block_l)`).
+ // The current block on the left side (from `l` to `l.add(block_l)`).
let mut l = v.as_mut_ptr();
let mut block_l = BLOCK;
let mut start_l = ptr::null_mut();
let mut end_l = ptr::null_mut();
- let mut offsets_l = [0u8; BLOCK];
+ let mut offsets_l = [MaybeUninit::<u8>::uninit(); BLOCK];
- // The current block on the right side (from `r.offset(-block_r)` to `r`).
+ // The current block on the right side (from `r.sub(block_r)` to `r`).
+ // SAFETY: The documentation for .add() specifically mention that `vec.as_ptr().add(vec.len())` is always safe`
let mut r = unsafe { l.add(v.len()) };
let mut block_r = BLOCK;
let mut start_r = ptr::null_mut();
let mut end_r = ptr::null_mut();
- let mut offsets_r = [0u8; BLOCK];
+ let mut offsets_r = [MaybeUninit::<u8>::uninit(); BLOCK];
+
+ // FIXME: When we get VLAs, try creating one array of length `min(v.len(), 2 * BLOCK)` rather
+ // than two fixed-size arrays of length `BLOCK`. VLAs might be more cache-efficient.
// Returns the number of elements between pointers `l` (inclusive) and `r` (exclusive).
fn width<T>(l: *mut T, r: *mut T) -> usize {
assert!(mem::size_of::<T>() > 0);
+ // FIXME: this should *likely* use `offset_from`, but more
+ // investigation is needed (including running tests in miri).
+ // TODO unstable: (r.addr() - l.addr()) / mem::size_of::<T>()
(r as usize - l as usize) / mem::size_of::<T>()
}
@@ -289,6 +300,9 @@ where
} else if start_r < end_r {
block_l = rem;
} else {
+ // There were the same number of elements to switch on both blocks during the last
+ // iteration, so there are no remaining elements on either block. Cover the remaining
+ // items with roughly equally-sized blocks.
block_l = rem / 2;
block_r = rem - block_l;
}
@@ -298,11 +312,22 @@ where
if start_l == end_l {
// Trace `block_l` elements from the left side.
- start_l = offsets_l.as_mut_ptr();
- end_l = offsets_l.as_mut_ptr();
+ // TODO unstable: start_l = MaybeUninit::slice_as_mut_ptr(&mut offsets_l);
+ start_l = offsets_l.as_mut_ptr() as *mut u8;
+ end_l = start_l;
let mut elem = l;
for i in 0..block_l {
+ // SAFETY: The unsafety operations below involve the usage of the `offset`.
+ // According to the conditions required by the function, we satisfy them because:
+ // 1. `offsets_l` is stack-allocated, and thus considered separate allocated object.
+ // 2. The function `is_less` returns a `bool`.
+ // Casting a `bool` will never overflow `isize`.
+ // 3. We have guaranteed that `block_l` will be `<= BLOCK`.
+ // Plus, `end_l` was initially set to the begin pointer of `offsets_` which was declared on the stack.
+ // Thus, we know that even in the worst case (all invocations of `is_less` returns false) we will only be at most 1 byte pass the end.
+ // Another unsafety operation here is dereferencing `elem`.
+ // However, `elem` was initially the begin pointer to the slice which is always valid.
unsafe {
// Branchless comparison.
*end_l = i as u8;
@@ -314,11 +339,23 @@ where
if start_r == end_r {
// Trace `block_r` elements from the right side.
- start_r = offsets_r.as_mut_ptr();
- end_r = offsets_r.as_mut_ptr();
+ // TODO unstable: start_r = MaybeUninit::slice_as_mut_ptr(&mut offsets_r);
+ start_r = offsets_r.as_mut_ptr() as *mut u8;
+ end_r = start_r;
let mut elem = r;
for i in 0..block_r {
+ // SAFETY: The unsafety operations below involve the usage of the `offset`.
+ // According to the conditions required by the function, we satisfy them because:
+ // 1. `offsets_r` is stack-allocated, and thus considered separate allocated object.
+ // 2. The function `is_less` returns a `bool`.
+ // Casting a `bool` will never overflow `isize`.
+ // 3. We have guaranteed that `block_r` will be `<= BLOCK`.
+ // Plus, `end_r` was initially set to the begin pointer of `offsets_` which was declared on the stack.
+ // Thus, we know that even in the worst case (all invocations of `is_less` returns true) we will only be at most 1 byte pass the end.
+ // Another unsafety operation here is dereferencing `elem`.
+ // However, `elem` was initially `1 * sizeof(T)` past the end and we decrement it by `1 * sizeof(T)` before accessing it.
+ // Plus, `block_r` was asserted to be less than `BLOCK` and `elem` will therefore at most be pointing to the beginning of the slice.
unsafe {
// Branchless comparison.
elem = elem.offset(-1);
@@ -346,6 +383,22 @@ where
// Instead of swapping one pair at the time, it is more efficient to perform a cyclic
// permutation. This is not strictly equivalent to swapping, but produces a similar
// result using fewer memory operations.
+
+ // SAFETY: The use of `ptr::read` is valid because there is at least one element in
+ // both `offsets_l` and `offsets_r`, so `left!` is a valid pointer to read from.
+ //
+ // The uses of `left!` involve calls to `offset` on `l`, which points to the
+ // beginning of `v`. All the offsets pointed-to by `start_l` are at most `block_l`, so
+ // these `offset` calls are safe as all reads are within the block. The same argument
+ // applies for the uses of `right!`.
+ //
+ // The calls to `start_l.offset` are valid because there are at most `count-1` of them,
+ // plus the final one at the end of the unsafe block, where `count` is the minimum number
+ // of collected offsets in `offsets_l` and `offsets_r`, so there is no risk of there not
+ // being enough elements. The same reasoning applies to the calls to `start_r.offset`.
+ //
+ // The calls to `copy_nonoverlapping` are safe because `left!` and `right!` are guaranteed
+ // not to overlap, and are valid because of the reasoning above.
unsafe {
let tmp = ptr::read(left!());
ptr::copy_nonoverlapping(right!(), left!(), 1);
@@ -366,12 +419,22 @@ where
if start_l == end_l {
// All out-of-order elements in the left block were moved. Move to the next block.
+
+ // block-width-guarantee
+ // SAFETY: if `!is_done` then the slice width is guaranteed to be at least `2*BLOCK` wide. There
+ // are at most `BLOCK` elements in `offsets_l` because of its size, so the `offset` operation is
+ // safe. Otherwise, the debug assertions in the `is_done` case guarantee that
+ // `width(l, r) == block_l + block_r`, namely, that the block sizes have been adjusted to account
+ // for the smaller number of remaining elements.
l = unsafe { l.add(block_l) };
}
if start_r == end_r {
// All out-of-order elements in the right block were moved. Move to the previous block.
- r = unsafe { r.sub(block_r) };
+
+ // SAFETY: Same argument as [block-width-guarantee]. Either this is a full block `2*BLOCK`-wide,
+ // or `block_r` has been adjusted for the last handful of elements.
+ r = unsafe { r.offset(-(block_r as isize)) };
}
if is_done {
@@ -385,9 +448,20 @@ where
if start_l < end_l {
// The left block remains.
- // Move it's remaining out-of-order elements to the far right.
+ // Move its remaining out-of-order elements to the far right.
debug_assert_eq!(width(l, r), block_l);
while start_l < end_l {
+ // remaining-elements-safety
+ // SAFETY: while the loop condition holds there are still elements in `offsets_l`, so it
+ // is safe to point `end_l` to the previous element.
+ //
+ // The `ptr::swap` is safe if both its arguments are valid for reads and writes:
+ // - Per the debug assert above, the distance between `l` and `r` is `block_l`
+ // elements, so there can be at most `block_l` remaining offsets between `start_l`
+ // and `end_l`. This means `r` will be moved at most `block_l` steps back, which
+ // makes the `r.offset` calls valid (at that point `l == r`).
+ // - `offsets_l` contains valid offsets into `v` collected during the partitioning of
+ // the last block, so the `l.offset` calls are valid.
unsafe {
end_l = end_l.offset(-1);
ptr::swap(l.offset(*end_l as isize), r.offset(-1));
@@ -397,9 +471,10 @@ where
width(v.as_mut_ptr(), r)
} else if start_r < end_r {
// The right block remains.
- // Move it's remaining out-of-order elements to the far left.
+ // Move its remaining out-of-order elements to the far left.
debug_assert_eq!(width(l, r), block_r);
while start_r < end_r {
+ // SAFETY: See the reasoning in [remaining-elements-safety].
unsafe {
end_r = end_r.offset(-1);
ptr::swap(l, r.offset(-(*end_r as isize) - 1));
@@ -432,17 +507,25 @@ where
// Read the pivot into a stack-allocated variable for efficiency. If a following comparison
// operation panics, the pivot will be automatically written back into the slice.
- let write_on_drop = WriteOnDrop {
- value: unsafe { Some(ptr::read(pivot)) },
+
+ // SAFETY: `pivot` is a reference to the first element of `v`, so `ptr::read` is safe.
+ let tmp = mem::ManuallyDrop::new(unsafe { ptr::read(pivot) });
+ let _pivot_guard = CopyOnDrop {
+ src: &*tmp,
dest: pivot,
};
- let pivot = write_on_drop.value.as_ref().unwrap();
+ let pivot = &*tmp;
// Find the first pair of out-of-order elements.
let mut l = 0;
let mut r = v.len();
+
+ // SAFETY: The unsafety below involves indexing an array.
+ // For the first one: We already do the bounds checking here with `l < r`.
+ // For the second one: We initially have `l == 0` and `r == v.len()` and we checked that `l < r` at every indexing operation.
+ // From here we know that `r` must be at least `r == l` which was shown to be valid from the first one.
unsafe {
- // Find the first element greater then or equal to the pivot.
+ // Find the first element greater than or equal to the pivot.
while l < r && is_less(v.get_unchecked(l), pivot) {
l += 1;
}
@@ -458,7 +541,7 @@ where
l >= r,
)
- // `write_on_drop` goes out of scope and writes the pivot (which is a stack-allocated
+ // `_pivot_guard` goes out of scope and writes the pivot (which is a stack-allocated
// variable) back into the slice where it originally was. This step is critical in ensuring
// safety!
};
@@ -484,18 +567,24 @@ where
// Read the pivot into a stack-allocated variable for efficiency. If a following comparison
// operation panics, the pivot will be automatically written back into the slice.
- let write_on_drop = WriteOnDrop {
- value: unsafe { Some(ptr::read(pivot)) },
+ // SAFETY: The pointer here is valid because it is obtained from a reference to a slice.
+ let tmp = mem::ManuallyDrop::new(unsafe { ptr::read(pivot) });
+ let _pivot_guard = CopyOnDrop {
+ src: &*tmp,
dest: pivot,
};
- let pivot = write_on_drop.value.as_ref().unwrap();
+ let pivot = &*tmp;
// Now partition the slice.
let mut l = 0;
let mut r = v.len();
loop {
+ // SAFETY: The unsafety below involves indexing an array.
+ // For the first one: We already do the bounds checking here with `l < r`.
+ // For the second one: We initially have `l == 0` and `r == v.len()` and we checked that `l < r` at every indexing operation.
+ // From here we know that `r` must be at least `r == l` which was shown to be valid from the first one.
unsafe {
- // Find the first element greater that the pivot.
+ // Find the first element greater than the pivot.
while l < r && !is_less(pivot, v.get_unchecked(l)) {
l += 1;
}
@@ -512,7 +601,8 @@ where
// Swap the found pair of out-of-order elements.
r -= 1;
- ptr::swap(v.get_unchecked_mut(l), v.get_unchecked_mut(r));
+ let ptr = v.as_mut_ptr();
+ ptr::swap(ptr.add(l), ptr.add(r));
l += 1;
}
}
@@ -520,7 +610,7 @@ where
// We found `l` elements equal to the pivot. Add 1 to account for the pivot itself.
l + 1
- // `write_on_drop` goes out of scope and writes the pivot (which is a stack-allocated variable)
+ // `_pivot_guard` goes out of scope and writes the pivot (which is a stack-allocated variable)
// back into the slice where it originally was. This step is critical in ensuring safety!
}
@@ -539,10 +629,10 @@ fn break_patterns<T>(v: &mut [T]) {
random
};
let mut gen_usize = || {
- if mem::size_of::<usize>() <= 4 {
+ if usize::BITS <= 32 {
gen_u32() as usize
} else {
- ((u64::from(gen_u32()) << 32) | u64::from(gen_u32())) as usize
+ (((gen_u32() as u64) << 32) | (gen_u32() as u64)) as usize
}
};
@@ -585,6 +675,7 @@ where
let len = v.len();
// Three indices near which we are going to choose a pivot.
+ #[allow(clippy::identity_op)]
let mut a = len / 4 * 1;
let mut b = len / 4 * 2;
let mut c = len / 4 * 3;
@@ -594,6 +685,12 @@ where
if len >= 8 {
// Swaps indices so that `v[a] <= v[b]`.
+ // SAFETY: `len >= 8` so there are at least two elements in the neighborhoods of
+ // `a`, `b` and `c`. This means the three calls to `sort_adjacent` result in
+ // corresponding calls to `sort3` with valid 3-item neighborhoods around each
+ // pointer, which in turn means the calls to `sort2` are done with valid
+ // references. Thus the `v.get_unchecked` calls are safe, as is the `ptr::swap`
+ // call.
let mut sort2 = |a: &mut usize, b: &mut usize| unsafe {
if is_less(v.get_unchecked(*b), v.get_unchecked(*a)) {
ptr::swap(a, b);
@@ -641,7 +738,7 @@ where
///
/// `limit` is the number of allowed imbalanced partitions before switching to `heapsort`. If zero,
/// this function will immediately switch to heapsort.
-fn recurse<'a, T, F>(mut v: &'a mut [T], is_less: &F, mut pred: Option<&'a mut T>, mut limit: usize)
+fn recurse<'a, T, F>(mut v: &'a mut [T], is_less: &F, mut pred: Option<&'a mut T>, mut limit: u32)
where
T: Send,
F: Fn(&T, &T) -> bool + Sync,
@@ -667,7 +764,7 @@ where
}
// If too many bad pivot choices were made, simply fall back to heapsort in order to
- // guarantee `O(n log n)` worst-case.
+ // guarantee `O(n * log(n))` worst-case.
if limit == 0 {
heapsort(v, is_less);
return;
@@ -701,7 +798,7 @@ where
let mid = partition_equal(v, pivot, is_less);
// Continue sorting elements greater than the pivot.
- v = &mut { v }[mid..];
+ v = &mut v[mid..];
continue;
}
}
@@ -712,7 +809,7 @@ where
was_partitioned = was_p;
// Split the slice into `left`, `pivot`, and `right`.
- let (left, right) = { v }.split_at_mut(mid);
+ let (left, right) = v.split_at_mut(mid);
let (pivot, right) = right.split_at_mut(1);
let pivot = &mut pivot[0];
@@ -741,7 +838,7 @@ where
/// Sorts `v` using pattern-defeating quicksort in parallel.
///
-/// The algorithm is unstable, in-place, and `O(n log n)` worst-case.
+/// The algorithm is unstable, in-place, and *O*(*n* \* log(*n*)) worst-case.
pub(super) fn par_quicksort<T, F>(v: &mut [T], is_less: F)
where
T: Send,
@@ -753,7 +850,7 @@ where
}
// Limit the number of imbalanced partitions to `floor(log2(len)) + 1`.
- let limit = mem::size_of::<usize>() * 8 - v.len().leading_zeros() as usize;
+ let limit = usize::BITS - v.len().leading_zeros();
recurse(v, &is_less, None, limit);
}
@@ -766,7 +863,7 @@ mod tests {
#[test]
fn test_heapsort() {
- let ref mut rng = thread_rng();
+ let rng = &mut thread_rng();
for len in (0..25).chain(500..501) {
for &modulus in &[5, 10, 100] {
@@ -793,8 +890,8 @@ mod tests {
heapsort(&mut v, &|_, _| thread_rng().gen());
heapsort(&mut v, &|a, b| a < b);
- for i in 0..v.len() {
- assert_eq!(v[i], i);
+ for (i, &entry) in v.iter().enumerate() {
+ assert_eq!(entry, i);
}
}
}
diff --git a/src/slice/rchunks.rs b/src/slice/rchunks.rs
new file mode 100644
index 0000000..63e765d
--- /dev/null
+++ b/src/slice/rchunks.rs
@@ -0,0 +1,386 @@
+use crate::iter::plumbing::*;
+use crate::iter::*;
+use crate::math::div_round_up;
+
+/// Parallel iterator over immutable non-overlapping chunks of a slice, starting at the end.
+#[derive(Debug)]
+pub struct RChunks<'data, T: Sync> {
+ chunk_size: usize,
+ slice: &'data [T],
+}
+
+impl<'data, T: Sync> RChunks<'data, T> {
+ pub(super) fn new(chunk_size: usize, slice: &'data [T]) -> Self {
+ Self { chunk_size, slice }
+ }
+}
+
+impl<'data, T: Sync> Clone for RChunks<'data, T> {
+ fn clone(&self) -> Self {
+ RChunks { ..*self }
+ }
+}
+
+impl<'data, T: Sync + 'data> ParallelIterator for RChunks<'data, T> {
+ type Item = &'data [T];
+
+ fn drive_unindexed<C>(self, consumer: C) -> C::Result
+ where
+ C: UnindexedConsumer<Self::Item>,
+ {
+ bridge(self, consumer)
+ }
+
+ fn opt_len(&self) -> Option<usize> {
+ Some(self.len())
+ }
+}
+
+impl<'data, T: Sync + 'data> IndexedParallelIterator for RChunks<'data, T> {
+ fn drive<C>(self, consumer: C) -> C::Result
+ where
+ C: Consumer<Self::Item>,
+ {
+ bridge(self, consumer)
+ }
+
+ fn len(&self) -> usize {
+ div_round_up(self.slice.len(), self.chunk_size)
+ }
+
+ fn with_producer<CB>(self, callback: CB) -> CB::Output
+ where
+ CB: ProducerCallback<Self::Item>,
+ {
+ callback.callback(RChunksProducer {
+ chunk_size: self.chunk_size,
+ slice: self.slice,
+ })
+ }
+}
+
+struct RChunksProducer<'data, T: Sync> {
+ chunk_size: usize,
+ slice: &'data [T],
+}
+
+impl<'data, T: 'data + Sync> Producer for RChunksProducer<'data, T> {
+ type Item = &'data [T];
+ type IntoIter = ::std::slice::RChunks<'data, T>;
+
+ fn into_iter(self) -> Self::IntoIter {
+ self.slice.rchunks(self.chunk_size)
+ }
+
+ fn split_at(self, index: usize) -> (Self, Self) {
+ let elem_index = self.slice.len().saturating_sub(index * self.chunk_size);
+ let (left, right) = self.slice.split_at(elem_index);
+ (
+ RChunksProducer {
+ chunk_size: self.chunk_size,
+ slice: right,
+ },
+ RChunksProducer {
+ chunk_size: self.chunk_size,
+ slice: left,
+ },
+ )
+ }
+}
+
+/// Parallel iterator over immutable non-overlapping chunks of a slice, starting at the end.
+#[derive(Debug)]
+pub struct RChunksExact<'data, T: Sync> {
+ chunk_size: usize,
+ slice: &'data [T],
+ rem: &'data [T],
+}
+
+impl<'data, T: Sync> RChunksExact<'data, T> {
+ pub(super) fn new(chunk_size: usize, slice: &'data [T]) -> Self {
+ let rem_len = slice.len() % chunk_size;
+ let (rem, slice) = slice.split_at(rem_len);
+ Self {
+ chunk_size,
+ slice,
+ rem,
+ }
+ }
+
+ /// Return the remainder of the original slice that is not going to be
+ /// returned by the iterator. The returned slice has at most `chunk_size-1`
+ /// elements.
+ pub fn remainder(&self) -> &'data [T] {
+ self.rem
+ }
+}
+
+impl<'data, T: Sync> Clone for RChunksExact<'data, T> {
+ fn clone(&self) -> Self {
+ RChunksExact { ..*self }
+ }
+}
+
+impl<'data, T: Sync + 'data> ParallelIterator for RChunksExact<'data, T> {
+ type Item = &'data [T];
+
+ fn drive_unindexed<C>(self, consumer: C) -> C::Result
+ where
+ C: UnindexedConsumer<Self::Item>,
+ {
+ bridge(self, consumer)
+ }
+
+ fn opt_len(&self) -> Option<usize> {
+ Some(self.len())
+ }
+}
+
+impl<'data, T: Sync + 'data> IndexedParallelIterator for RChunksExact<'data, T> {
+ fn drive<C>(self, consumer: C) -> C::Result
+ where
+ C: Consumer<Self::Item>,
+ {
+ bridge(self, consumer)
+ }
+
+ fn len(&self) -> usize {
+ self.slice.len() / self.chunk_size
+ }
+
+ fn with_producer<CB>(self, callback: CB) -> CB::Output
+ where
+ CB: ProducerCallback<Self::Item>,
+ {
+ callback.callback(RChunksExactProducer {
+ chunk_size: self.chunk_size,
+ slice: self.slice,
+ })
+ }
+}
+
+struct RChunksExactProducer<'data, T: Sync> {
+ chunk_size: usize,
+ slice: &'data [T],
+}
+
+impl<'data, T: 'data + Sync> Producer for RChunksExactProducer<'data, T> {
+ type Item = &'data [T];
+ type IntoIter = ::std::slice::RChunksExact<'data, T>;
+
+ fn into_iter(self) -> Self::IntoIter {
+ self.slice.rchunks_exact(self.chunk_size)
+ }
+
+ fn split_at(self, index: usize) -> (Self, Self) {
+ let elem_index = self.slice.len() - index * self.chunk_size;
+ let (left, right) = self.slice.split_at(elem_index);
+ (
+ RChunksExactProducer {
+ chunk_size: self.chunk_size,
+ slice: right,
+ },
+ RChunksExactProducer {
+ chunk_size: self.chunk_size,
+ slice: left,
+ },
+ )
+ }
+}
+
+/// Parallel iterator over mutable non-overlapping chunks of a slice, starting at the end.
+#[derive(Debug)]
+pub struct RChunksMut<'data, T: Send> {
+ chunk_size: usize,
+ slice: &'data mut [T],
+}
+
+impl<'data, T: Send> RChunksMut<'data, T> {
+ pub(super) fn new(chunk_size: usize, slice: &'data mut [T]) -> Self {
+ Self { chunk_size, slice }
+ }
+}
+
+impl<'data, T: Send + 'data> ParallelIterator for RChunksMut<'data, T> {
+ type Item = &'data mut [T];
+
+ fn drive_unindexed<C>(self, consumer: C) -> C::Result
+ where
+ C: UnindexedConsumer<Self::Item>,
+ {
+ bridge(self, consumer)
+ }
+
+ fn opt_len(&self) -> Option<usize> {
+ Some(self.len())
+ }
+}
+
+impl<'data, T: Send + 'data> IndexedParallelIterator for RChunksMut<'data, T> {
+ fn drive<C>(self, consumer: C) -> C::Result
+ where
+ C: Consumer<Self::Item>,
+ {
+ bridge(self, consumer)
+ }
+
+ fn len(&self) -> usize {
+ div_round_up(self.slice.len(), self.chunk_size)
+ }
+
+ fn with_producer<CB>(self, callback: CB) -> CB::Output
+ where
+ CB: ProducerCallback<Self::Item>,
+ {
+ callback.callback(RChunksMutProducer {
+ chunk_size: self.chunk_size,
+ slice: self.slice,
+ })
+ }
+}
+
+struct RChunksMutProducer<'data, T: Send> {
+ chunk_size: usize,
+ slice: &'data mut [T],
+}
+
+impl<'data, T: 'data + Send> Producer for RChunksMutProducer<'data, T> {
+ type Item = &'data mut [T];
+ type IntoIter = ::std::slice::RChunksMut<'data, T>;
+
+ fn into_iter(self) -> Self::IntoIter {
+ self.slice.rchunks_mut(self.chunk_size)
+ }
+
+ fn split_at(self, index: usize) -> (Self, Self) {
+ let elem_index = self.slice.len().saturating_sub(index * self.chunk_size);
+ let (left, right) = self.slice.split_at_mut(elem_index);
+ (
+ RChunksMutProducer {
+ chunk_size: self.chunk_size,
+ slice: right,
+ },
+ RChunksMutProducer {
+ chunk_size: self.chunk_size,
+ slice: left,
+ },
+ )
+ }
+}
+
+/// Parallel iterator over mutable non-overlapping chunks of a slice, starting at the end.
+#[derive(Debug)]
+pub struct RChunksExactMut<'data, T: Send> {
+ chunk_size: usize,
+ slice: &'data mut [T],
+ rem: &'data mut [T],
+}
+
+impl<'data, T: Send> RChunksExactMut<'data, T> {
+ pub(super) fn new(chunk_size: usize, slice: &'data mut [T]) -> Self {
+ let rem_len = slice.len() % chunk_size;
+ let (rem, slice) = slice.split_at_mut(rem_len);
+ Self {
+ chunk_size,
+ slice,
+ rem,
+ }
+ }
+
+ /// Return the remainder of the original slice that is not going to be
+ /// returned by the iterator. The returned slice has at most `chunk_size-1`
+ /// elements.
+ ///
+ /// Note that this has to consume `self` to return the original lifetime of
+ /// the data, which prevents this from actually being used as a parallel
+ /// iterator since that also consumes. This method is provided for parity
+ /// with `std::iter::RChunksExactMut`, but consider calling `remainder()` or
+ /// `take_remainder()` as alternatives.
+ pub fn into_remainder(self) -> &'data mut [T] {
+ self.rem
+ }
+
+ /// Return the remainder of the original slice that is not going to be
+ /// returned by the iterator. The returned slice has at most `chunk_size-1`
+ /// elements.
+ ///
+ /// Consider `take_remainder()` if you need access to the data with its
+ /// original lifetime, rather than borrowing through `&mut self` here.
+ pub fn remainder(&mut self) -> &mut [T] {
+ self.rem
+ }
+
+ /// Return the remainder of the original slice that is not going to be
+ /// returned by the iterator. The returned slice has at most `chunk_size-1`
+ /// elements. Subsequent calls will return an empty slice.
+ pub fn take_remainder(&mut self) -> &'data mut [T] {
+ std::mem::take(&mut self.rem)
+ }
+}
+
+impl<'data, T: Send + 'data> ParallelIterator for RChunksExactMut<'data, T> {
+ type Item = &'data mut [T];
+
+ fn drive_unindexed<C>(self, consumer: C) -> C::Result
+ where
+ C: UnindexedConsumer<Self::Item>,
+ {
+ bridge(self, consumer)
+ }
+
+ fn opt_len(&self) -> Option<usize> {
+ Some(self.len())
+ }
+}
+
+impl<'data, T: Send + 'data> IndexedParallelIterator for RChunksExactMut<'data, T> {
+ fn drive<C>(self, consumer: C) -> C::Result
+ where
+ C: Consumer<Self::Item>,
+ {
+ bridge(self, consumer)
+ }
+
+ fn len(&self) -> usize {
+ self.slice.len() / self.chunk_size
+ }
+
+ fn with_producer<CB>(self, callback: CB) -> CB::Output
+ where
+ CB: ProducerCallback<Self::Item>,
+ {
+ callback.callback(RChunksExactMutProducer {
+ chunk_size: self.chunk_size,
+ slice: self.slice,
+ })
+ }
+}
+
+struct RChunksExactMutProducer<'data, T: Send> {
+ chunk_size: usize,
+ slice: &'data mut [T],
+}
+
+impl<'data, T: 'data + Send> Producer for RChunksExactMutProducer<'data, T> {
+ type Item = &'data mut [T];
+ type IntoIter = ::std::slice::RChunksExactMut<'data, T>;
+
+ fn into_iter(self) -> Self::IntoIter {
+ self.slice.rchunks_exact_mut(self.chunk_size)
+ }
+
+ fn split_at(self, index: usize) -> (Self, Self) {
+ let elem_index = self.slice.len() - index * self.chunk_size;
+ let (left, right) = self.slice.split_at_mut(elem_index);
+ (
+ RChunksExactMutProducer {
+ chunk_size: self.chunk_size,
+ slice: right,
+ },
+ RChunksExactMutProducer {
+ chunk_size: self.chunk_size,
+ slice: left,
+ },
+ )
+ }
+}
diff --git a/src/slice/test.rs b/src/slice/test.rs
index 71743e2..f74ca0f 100644
--- a/src/slice/test.rs
+++ b/src/slice/test.rs
@@ -10,7 +10,7 @@ macro_rules! sort {
($f:ident, $name:ident) => {
#[test]
fn $name() {
- let ref mut rng = thread_rng();
+ let rng = &mut thread_rng();
for len in (0..25).chain(500..501) {
for &modulus in &[5, 10, 100] {
@@ -146,3 +146,25 @@ fn test_par_chunks_exact_mut_remainder() {
assert_eq!(c.take_remainder(), &[]);
assert_eq!(c.len(), 2);
}
+
+#[test]
+fn test_par_rchunks_exact_remainder() {
+ let v: &[i32] = &[0, 1, 2, 3, 4];
+ let c = v.par_rchunks_exact(2);
+ assert_eq!(c.remainder(), &[0]);
+ assert_eq!(c.len(), 2);
+}
+
+#[test]
+fn test_par_rchunks_exact_mut_remainder() {
+ let v: &mut [i32] = &mut [0, 1, 2, 3, 4];
+ let mut c = v.par_rchunks_exact_mut(2);
+ assert_eq!(c.remainder(), &[0]);
+ assert_eq!(c.len(), 2);
+ assert_eq!(c.into_remainder(), &[0]);
+
+ let mut c = v.par_rchunks_exact_mut(2);
+ assert_eq!(c.take_remainder(), &[0]);
+ assert_eq!(c.take_remainder(), &[]);
+ assert_eq!(c.len(), 2);
+}
diff --git a/src/str.rs b/src/str.rs
index 2fdaaa7..c85754e 100644
--- a/src/str.rs
+++ b/src/str.rs
@@ -6,7 +6,8 @@
//! Note: [`ParallelString::par_split()`] and [`par_split_terminator()`]
//! reference a `Pattern` trait which is not visible outside this crate.
//! This trait is intentionally kept private, for use only by Rayon itself.
-//! It is implemented for `char` and any `F: Fn(char) -> bool + Sync + Send`.
+//! It is implemented for `char`, `&[char]`, and any function or closure
+//! `F: Fn(char) -> bool + Sync + Send`.
//!
//! [`ParallelString::par_split()`]: trait.ParallelString.html#method.par_split
//! [`par_split_terminator()`]: trait.ParallelString.html#method.par_split_terminator
@@ -34,11 +35,11 @@ fn find_char_midpoint(chars: &str) -> usize {
// character boundary. So we look at the raw bytes, first scanning
// forward from the midpoint for a boundary, then trying backward.
let (left, right) = chars.as_bytes().split_at(mid);
- match right.iter().cloned().position(is_char_boundary) {
+ match right.iter().copied().position(is_char_boundary) {
Some(i) => mid + i,
None => left
.iter()
- .cloned()
+ .copied()
.rposition(is_char_boundary)
.unwrap_or(0),
}
@@ -96,7 +97,7 @@ pub trait ParallelString {
/// Note that multi-byte sequences (for code points greater than `U+007F`)
/// are produced as separate items, but will not be split across threads.
/// If you would prefer an indexed iterator without that guarantee, consider
- /// `string.as_bytes().par_iter().cloned()` instead.
+ /// `string.as_bytes().par_iter().copied()` instead.
///
/// # Examples
///
@@ -139,7 +140,8 @@ pub trait ParallelString {
/// given character or predicate, similar to `str::split`.
///
/// Note: the `Pattern` trait is private, for use only by Rayon itself.
- /// It is implemented for `char` and any `F: Fn(char) -> bool + Sync + Send`.
+ /// It is implemented for `char`, `&[char]`, and any function or closure
+ /// `F: Fn(char) -> bool + Sync + Send`.
///
/// # Examples
///
@@ -161,7 +163,8 @@ pub trait ParallelString {
/// substring after a trailing terminator.
///
/// Note: the `Pattern` trait is private, for use only by Rayon itself.
- /// It is implemented for `char` and any `F: Fn(char) -> bool + Sync + Send`.
+ /// It is implemented for `char`, `&[char]`, and any function or closure
+ /// `F: Fn(char) -> bool + Sync + Send`.
///
/// # Examples
///
@@ -218,7 +221,8 @@ pub trait ParallelString {
/// given character or predicate, similar to `str::matches`.
///
/// Note: the `Pattern` trait is private, for use only by Rayon itself.
- /// It is implemented for `char` and any `F: Fn(char) -> bool + Sync + Send`.
+ /// It is implemented for `char`, `&[char]`, and any function or closure
+ /// `F: Fn(char) -> bool + Sync + Send`.
///
/// # Examples
///
@@ -241,7 +245,8 @@ pub trait ParallelString {
/// or predicate, with their positions, similar to `str::match_indices`.
///
/// Note: the `Pattern` trait is private, for use only by Rayon itself.
- /// It is implemented for `char` and any `F: Fn(char) -> bool + Sync + Send`.
+ /// It is implemented for `char`, `&[char]`, and any function or closure
+ /// `F: Fn(char) -> bool + Sync + Send`.
///
/// # Examples
///
@@ -303,89 +308,62 @@ fn offset<T>(base: usize) -> impl Fn((usize, T)) -> (usize, T) {
move |(i, x)| (base + i, x)
}
-impl Pattern for char {
- private_impl! {}
+macro_rules! impl_pattern {
+ (&$self:ident => $pattern:expr) => {
+ private_impl! {}
- #[inline]
- fn find_in(&self, chars: &str) -> Option<usize> {
- chars.find(*self)
- }
+ #[inline]
+ fn find_in(&$self, chars: &str) -> Option<usize> {
+ chars.find($pattern)
+ }
- #[inline]
- fn rfind_in(&self, chars: &str) -> Option<usize> {
- chars.rfind(*self)
- }
+ #[inline]
+ fn rfind_in(&$self, chars: &str) -> Option<usize> {
+ chars.rfind($pattern)
+ }
- #[inline]
- fn is_suffix_of(&self, chars: &str) -> bool {
- chars.ends_with(*self)
- }
+ #[inline]
+ fn is_suffix_of(&$self, chars: &str) -> bool {
+ chars.ends_with($pattern)
+ }
- fn fold_splits<'ch, F>(&self, chars: &'ch str, folder: F, skip_last: bool) -> F
- where
- F: Folder<&'ch str>,
- {
- let mut split = chars.split(*self);
- if skip_last {
- split.next_back();
+ fn fold_splits<'ch, F>(&$self, chars: &'ch str, folder: F, skip_last: bool) -> F
+ where
+ F: Folder<&'ch str>,
+ {
+ let mut split = chars.split($pattern);
+ if skip_last {
+ split.next_back();
+ }
+ folder.consume_iter(split)
}
- folder.consume_iter(split)
- }
- fn fold_matches<'ch, F>(&self, chars: &'ch str, folder: F) -> F
- where
- F: Folder<&'ch str>,
- {
- folder.consume_iter(chars.matches(*self))
- }
+ fn fold_matches<'ch, F>(&$self, chars: &'ch str, folder: F) -> F
+ where
+ F: Folder<&'ch str>,
+ {
+ folder.consume_iter(chars.matches($pattern))
+ }
- fn fold_match_indices<'ch, F>(&self, chars: &'ch str, folder: F, base: usize) -> F
- where
- F: Folder<(usize, &'ch str)>,
- {
- folder.consume_iter(chars.match_indices(*self).map(offset(base)))
+ fn fold_match_indices<'ch, F>(&$self, chars: &'ch str, folder: F, base: usize) -> F
+ where
+ F: Folder<(usize, &'ch str)>,
+ {
+ folder.consume_iter(chars.match_indices($pattern).map(offset(base)))
+ }
}
}
-impl<FN: Sync + Send + Fn(char) -> bool> Pattern for FN {
- private_impl! {}
-
- fn find_in(&self, chars: &str) -> Option<usize> {
- chars.find(self)
- }
-
- fn rfind_in(&self, chars: &str) -> Option<usize> {
- chars.rfind(self)
- }
-
- fn is_suffix_of(&self, chars: &str) -> bool {
- chars.ends_with(self)
- }
-
- fn fold_splits<'ch, F>(&self, chars: &'ch str, folder: F, skip_last: bool) -> F
- where
- F: Folder<&'ch str>,
- {
- let mut split = chars.split(self);
- if skip_last {
- split.next_back();
- }
- folder.consume_iter(split)
- }
+impl Pattern for char {
+ impl_pattern!(&self => *self);
+}
- fn fold_matches<'ch, F>(&self, chars: &'ch str, folder: F) -> F
- where
- F: Folder<&'ch str>,
- {
- folder.consume_iter(chars.matches(self))
- }
+impl Pattern for &[char] {
+ impl_pattern!(&self => *self);
+}
- fn fold_match_indices<'ch, F>(&self, chars: &'ch str, folder: F, base: usize) -> F
- where
- F: Folder<(usize, &'ch str)>,
- {
- folder.consume_iter(chars.match_indices(self).map(offset(base)))
- }
+impl<FN: Sync + Send + Fn(char) -> bool> Pattern for FN {
+ impl_pattern!(&self => self);
}
// /////////////////////////////////////////////////////////////////////////
@@ -711,11 +689,7 @@ pub struct Lines<'ch>(&'ch str);
#[inline]
fn no_carriage_return(line: &str) -> &str {
- if line.ends_with('\r') {
- &line[..line.len() - 1]
- } else {
- line
- }
+ line.strip_suffix('\r').unwrap_or(line)
}
impl<'ch> ParallelIterator for Lines<'ch> {
diff --git a/src/vec.rs b/src/vec.rs
index 1e68aa0..7892f53 100644
--- a/src/vec.rs
+++ b/src/vec.rs
@@ -138,16 +138,10 @@ impl<'data, T: Send> IndexedParallelIterator for Drain<'data, T> {
{
unsafe {
// Make the vector forget about the drained items, and temporarily the tail too.
- let start = self.range.start;
- self.vec.set_len(start);
+ self.vec.set_len(self.range.start);
// Create the producer as the exclusive "owner" of the slice.
- let producer = {
- // Get a correct borrow lifetime, then extend it to the original length.
- let mut slice = &mut self.vec[start..];
- slice = slice::from_raw_parts_mut(slice.as_mut_ptr(), self.range.len());
- DrainProducer::new(slice)
- };
+ let producer = DrainProducer::from_vec(self.vec, self.range.len());
// The producer will move or drop each item from the drained range.
callback.callback(producer)
@@ -157,22 +151,24 @@ impl<'data, T: Send> IndexedParallelIterator for Drain<'data, T> {
impl<'data, T: Send> Drop for Drain<'data, T> {
fn drop(&mut self) {
- if self.range.len() > 0 {
- let Range { start, end } = self.range;
- if self.vec.len() != start {
- // We must not have produced, so just call a normal drain to remove the items.
- assert_eq!(self.vec.len(), self.orig_len);
- self.vec.drain(start..end);
- } else if end < self.orig_len {
- // The producer was responsible for consuming the drained items.
- // Move the tail items to their new place, then set the length to include them.
- unsafe {
- let ptr = self.vec.as_mut_ptr().add(start);
- let tail_ptr = self.vec.as_ptr().add(end);
- let tail_len = self.orig_len - end;
- ptr::copy(tail_ptr, ptr, tail_len);
- self.vec.set_len(start + tail_len);
- }
+ let Range { start, end } = self.range;
+ if self.vec.len() == self.orig_len {
+ // We must not have produced, so just call a normal drain to remove the items.
+ self.vec.drain(start..end);
+ } else if start == end {
+ // Empty range, so just restore the length to its original state
+ unsafe {
+ self.vec.set_len(self.orig_len);
+ }
+ } else if end < self.orig_len {
+ // The producer was responsible for consuming the drained items.
+ // Move the tail items to their new place, then set the length to include them.
+ unsafe {
+ let ptr = self.vec.as_mut_ptr().add(start);
+ let tail_ptr = self.vec.as_ptr().add(end);
+ let tail_len = self.orig_len - end;
+ ptr::copy(tail_ptr, ptr, tail_len);
+ self.vec.set_len(start + tail_len);
}
}
}
@@ -184,13 +180,27 @@ pub(crate) struct DrainProducer<'data, T: Send> {
slice: &'data mut [T],
}
-impl<'data, T: 'data + Send> DrainProducer<'data, T> {
+impl<T: Send> DrainProducer<'_, T> {
/// Creates a draining producer, which *moves* items from the slice.
///
- /// Unsafe bacause `!Copy` data must not be read after the borrow is released.
- pub(crate) unsafe fn new(slice: &'data mut [T]) -> Self {
+ /// Unsafe because `!Copy` data must not be read after the borrow is released.
+ pub(crate) unsafe fn new(slice: &mut [T]) -> DrainProducer<'_, T> {
DrainProducer { slice }
}
+
+ /// Creates a draining producer, which *moves* items from the tail of the vector.
+ ///
+ /// Unsafe because we're moving from beyond `vec.len()`, so the caller must ensure
+ /// that data is initialized and not read after the borrow is released.
+ unsafe fn from_vec(vec: &mut Vec<T>, len: usize) -> DrainProducer<'_, T> {
+ let start = vec.len();
+ assert!(vec.capacity() - start >= len);
+
+ // The pointer is derived from `Vec` directly, not through a `Deref`,
+ // so it has provenance over the whole allocation.
+ let ptr = vec.as_mut_ptr().add(start);
+ DrainProducer::new(slice::from_raw_parts_mut(ptr, len))
+ }
}
impl<'data, T: 'data + Send> Producer for DrainProducer<'data, T> {
@@ -199,7 +209,7 @@ impl<'data, T: 'data + Send> Producer for DrainProducer<'data, T> {
fn into_iter(mut self) -> Self::IntoIter {
// replace the slice so we don't drop it twice
- let slice = mem::replace(&mut self.slice, &mut []);
+ let slice = mem::take(&mut self.slice);
SliceDrain {
iter: slice.iter_mut(),
}
@@ -207,7 +217,7 @@ impl<'data, T: 'data + Send> Producer for DrainProducer<'data, T> {
fn split_at(mut self, index: usize) -> (Self, Self) {
// replace the slice so we don't drop it twice
- let slice = mem::replace(&mut self.slice, &mut []);
+ let slice = mem::take(&mut self.slice);
let (left, right) = slice.split_at_mut(index);
unsafe { (DrainProducer::new(left), DrainProducer::new(right)) }
}
@@ -215,8 +225,9 @@ impl<'data, T: 'data + Send> Producer for DrainProducer<'data, T> {
impl<'data, T: 'data + Send> Drop for DrainProducer<'data, T> {
fn drop(&mut self) {
- // use `Drop for [T]`
- unsafe { ptr::drop_in_place(self.slice) };
+ // extract the slice so we can use `Drop for [T]`
+ let slice_ptr: *mut [T] = mem::take::<&'data mut [T]>(&mut self.slice);
+ unsafe { ptr::drop_in_place::<[T]>(slice_ptr) };
}
}
@@ -266,7 +277,7 @@ impl<'data, T: 'data> iter::FusedIterator for SliceDrain<'data, T> {}
impl<'data, T: 'data> Drop for SliceDrain<'data, T> {
fn drop(&mut self) {
// extract the iterator so we can use `Drop for [T]`
- let iter = mem::replace(&mut self.iter, [].iter_mut());
- unsafe { ptr::drop_in_place(iter.into_slice()) };
+ let slice_ptr: *mut [T] = mem::replace(&mut self.iter, [].iter_mut()).into_slice();
+ unsafe { ptr::drop_in_place::<[T]>(slice_ptr) };
}
}
diff --git a/tests/clones.rs b/tests/clones.rs
index 9add775..0d6c864 100644
--- a/tests/clones.rs
+++ b/tests/clones.rs
@@ -10,6 +10,13 @@ where
assert_eq!(a, b);
}
+fn check_count<I>(iter: I)
+where
+ I: ParallelIterator + Clone,
+{
+ assert_eq!(iter.clone().count(), iter.count());
+}
+
#[test]
fn clone_binary_heap() {
use std::collections::BinaryHeap;
@@ -102,6 +109,8 @@ fn clone_vec() {
check(v.par_iter());
check(v.par_chunks(42));
check(v.par_chunks_exact(42));
+ check(v.par_rchunks(42));
+ check(v.par_rchunks_exact(42));
check(v.par_windows(42));
check(v.par_split(|x| x % 3 == 0));
check(v.into_par_iter());
@@ -128,6 +137,12 @@ fn clone_adaptors() {
check(v.par_iter().flatten_iter());
check(v.par_iter().with_max_len(1).fold(|| 0, |x, _| x));
check(v.par_iter().with_max_len(1).fold_with(0, |x, _| x));
+ check(v.par_iter().with_max_len(1).fold_chunks(1, || 0, |x, _| x));
+ check(
+ v.par_iter()
+ .with_max_len(1)
+ .fold_chunks_with(1, 0, |x, _| x),
+ );
check(v.par_iter().with_max_len(1).try_fold(|| 0, |_, &x| x));
check(v.par_iter().with_max_len(1).try_fold_with(0, |_, &x| x));
check(v.par_iter().inspect(|_| ()));
@@ -142,8 +157,10 @@ fn clone_adaptors() {
check(v.par_iter().panic_fuse());
check(v.par_iter().positions(|_| true));
check(v.par_iter().rev());
- check(v.par_iter().skip(1));
- check(v.par_iter().take(1));
+ check(v.par_iter().skip(42));
+ check(v.par_iter().skip_any_while(|_| false));
+ check(v.par_iter().take(42));
+ check(v.par_iter().take_any_while(|_| true));
check(v.par_iter().cloned().while_some());
check(v.par_iter().with_max_len(1));
check(v.par_iter().with_min_len(1));
@@ -153,6 +170,13 @@ fn clone_adaptors() {
}
#[test]
+fn clone_counted_adaptors() {
+ let v: Vec<_> = (0..1000).collect();
+ check_count(v.par_iter().skip_any(42));
+ check_count(v.par_iter().take_any(42));
+}
+
+#[test]
fn clone_empty() {
check(rayon::iter::empty::<i32>());
}
diff --git a/tests/collect.rs b/tests/collect.rs
index 48b80f6..bfb080c 100644
--- a/tests/collect.rs
+++ b/tests/collect.rs
@@ -6,6 +6,7 @@ use std::sync::atomic::Ordering;
use std::sync::Mutex;
#[test]
+#[cfg_attr(not(panic = "unwind"), ignore)]
fn collect_drop_on_unwind() {
struct Recorddrop<'a>(i64, &'a Mutex<Vec<i64>>);
@@ -61,6 +62,7 @@ fn collect_drop_on_unwind() {
}
#[test]
+#[cfg_attr(not(panic = "unwind"), ignore)]
fn collect_drop_on_unwind_zst() {
static INSERTS: AtomicUsize = AtomicUsize::new(0);
static DROPS: AtomicUsize = AtomicUsize::new(0);
diff --git a/tests/cross-pool.rs b/tests/cross-pool.rs
index f0a2128..835e2e2 100644
--- a/tests/cross-pool.rs
+++ b/tests/cross-pool.rs
@@ -2,6 +2,7 @@ use rayon::prelude::*;
use rayon::ThreadPoolBuilder;
#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
fn cross_pool_busy() {
let pool1 = ThreadPoolBuilder::new().num_threads(1).build().unwrap();
let pool2 = ThreadPoolBuilder::new().num_threads(1).build().unwrap();
diff --git a/tests/debug.rs b/tests/debug.rs
index 3584ba0..14f3791 100644
--- a/tests/debug.rs
+++ b/tests/debug.rs
@@ -123,6 +123,10 @@ fn debug_vec() {
check(v.par_chunks_exact(42));
check(v.par_chunks_mut(42));
check(v.par_chunks_exact_mut(42));
+ check(v.par_rchunks(42));
+ check(v.par_rchunks_exact(42));
+ check(v.par_rchunks_mut(42));
+ check(v.par_rchunks_exact_mut(42));
check(v.par_windows(42));
check(v.par_split(|x| x % 3 == 0));
check(v.par_split_mut(|x| x % 3 == 0));
@@ -151,6 +155,8 @@ fn debug_adaptors() {
check(v.par_iter().map(Some).flatten_iter());
check(v.par_iter().fold(|| 0, |x, _| x));
check(v.par_iter().fold_with(0, |x, _| x));
+ check(v.par_iter().fold_chunks(3, || 0, |x, _| x));
+ check(v.par_iter().fold_chunks_with(3, 0, |x, _| x));
check(v.par_iter().try_fold(|| 0, |x, _| Some(x)));
check(v.par_iter().try_fold_with(0, |x, _| Some(x)));
check(v.par_iter().inspect(|_| ()));
@@ -166,7 +172,11 @@ fn debug_adaptors() {
check(v.par_iter().positions(|_| true));
check(v.par_iter().rev());
check(v.par_iter().skip(1));
+ check(v.par_iter().skip_any(1));
+ check(v.par_iter().skip_any_while(|_| false));
check(v.par_iter().take(1));
+ check(v.par_iter().take_any(1));
+ check(v.par_iter().take_any_while(|_| true));
check(v.par_iter().map(Some).while_some());
check(v.par_iter().with_max_len(1));
check(v.par_iter().with_min_len(1));
diff --git a/tests/drain_vec.rs b/tests/drain_vec.rs
new file mode 100644
index 0000000..08f1120
--- /dev/null
+++ b/tests/drain_vec.rs
@@ -0,0 +1,41 @@
+use rayon::prelude::*;
+
+#[test]
+fn drain_vec_yielded() {
+ let mut vec_org = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
+
+ let yielded = vec_org.par_drain(0..5).collect::<Vec<_>>();
+
+ assert_eq!(&yielded, &[0, 1, 2, 3, 4]);
+ assert_eq!(&vec_org, &[5, 6, 7, 8, 9]);
+}
+
+#[test]
+fn drain_vec_dropped() {
+ let mut vec_org = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
+
+ let yielded = vec_org.par_drain(0..5);
+
+ drop(yielded);
+ assert_eq!(&vec_org, &[5, 6, 7, 8, 9]);
+}
+
+#[test]
+fn drain_vec_empty_range_yielded() {
+ let mut vec_org = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
+
+ let yielded = vec_org.par_drain(5..5).collect::<Vec<_>>();
+
+ assert_eq!(&yielded, &[]);
+ assert_eq!(&vec_org, &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
+}
+
+#[test]
+fn drain_vec_empty_range_dropped() {
+ let mut vec_org = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
+
+ let yielded = vec_org.par_drain(5..5);
+
+ drop(yielded);
+ assert_eq!(&vec_org, &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
+}
diff --git a/tests/iter_panic.rs b/tests/iter_panic.rs
index 4885a28..c62a8db 100644
--- a/tests/iter_panic.rs
+++ b/tests/iter_panic.rs
@@ -20,6 +20,7 @@ fn iter_panic() {
}
#[test]
+#[cfg_attr(not(panic = "unwind"), ignore)]
fn iter_panic_fuse() {
// We only use a single thread in order to make the behavior
// of 'panic_fuse' deterministic
@@ -47,6 +48,6 @@ fn iter_panic_fuse() {
assert!(count(iter.clone().panic_fuse().inspect(check)) < expected);
// Try in reverse to be sure we hit the producer case.
- assert!(count(iter.clone().panic_fuse().inspect(check).rev()) < expected);
+ assert!(count(iter.panic_fuse().inspect(check).rev()) < expected);
});
}
diff --git a/tests/named-threads.rs b/tests/named-threads.rs
index fd1b0be..dadb37b 100644
--- a/tests/named-threads.rs
+++ b/tests/named-threads.rs
@@ -4,6 +4,7 @@ use rayon::prelude::*;
use rayon::*;
#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
fn named_threads() {
ThreadPoolBuilder::new()
.thread_name(|i| format!("hello-name-test-{}", i))
diff --git a/tests/octillion.rs b/tests/octillion.rs
index cff2b11..1af9ad8 100644
--- a/tests/octillion.rs
+++ b/tests/octillion.rs
@@ -68,7 +68,14 @@ fn two_threads<F: Send + FnOnce() -> R, R: Send>(f: F) -> R {
}
#[test]
-#[cfg_attr(not(target_pointer_width = "64"), ignore)]
+#[cfg_attr(
+ any(
+ not(target_pointer_width = "64"),
+ target_os = "emscripten",
+ target_family = "wasm"
+ ),
+ ignore
+)]
fn find_last_octillion() {
// It would be nice if `find_last` could prioritize the later splits,
// basically flipping the `join` args, without needing indexed `rev`.
@@ -78,32 +85,49 @@ fn find_last_octillion() {
}
#[test]
-#[cfg_attr(not(target_pointer_width = "64"), ignore)]
+#[cfg_attr(
+ any(
+ not(target_pointer_width = "64"),
+ target_os = "emscripten",
+ target_family = "wasm"
+ ),
+ ignore
+)]
fn find_last_octillion_inclusive() {
let x = two_threads(|| octillion_inclusive().find_last(|_| true));
assert_eq!(x, Some(OCTILLION));
}
#[test]
-#[cfg_attr(not(target_pointer_width = "64"), ignore)]
+#[cfg_attr(
+ any(
+ not(target_pointer_width = "64"),
+ target_os = "emscripten",
+ target_family = "wasm"
+ ),
+ ignore
+)]
fn find_last_octillion_flat() {
let x = two_threads(|| octillion_flat().find_last(|_| true));
assert_eq!(x, Some(OCTILLION - 1));
}
#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
fn find_any_octillion() {
let x = two_threads(|| octillion().find_any(|x| *x > OCTILLION / 2));
assert!(x.is_some());
}
#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
fn find_any_octillion_flat() {
let x = two_threads(|| octillion_flat().find_any(|x| *x > OCTILLION / 2));
assert!(x.is_some());
}
#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
fn filter_find_any_octillion() {
let x = two_threads(|| {
octillion()
@@ -114,6 +138,7 @@ fn filter_find_any_octillion() {
}
#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
fn filter_find_any_octillion_flat() {
let x = two_threads(|| {
octillion_flat()
@@ -124,6 +149,7 @@ fn filter_find_any_octillion_flat() {
}
#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
fn fold_find_any_octillion_flat() {
let x = two_threads(|| octillion_flat().fold(|| (), |_, _| ()).find_any(|_| true));
assert!(x.is_some());
diff --git a/tests/par_bridge_recursion.rs b/tests/par_bridge_recursion.rs
new file mode 100644
index 0000000..3a48ef1
--- /dev/null
+++ b/tests/par_bridge_recursion.rs
@@ -0,0 +1,31 @@
+use rayon::prelude::*;
+use std::iter::once_with;
+
+const N: usize = 100_000;
+
+#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
+fn par_bridge_recursion() {
+ let pool = rayon::ThreadPoolBuilder::new()
+ .num_threads(10)
+ .build()
+ .unwrap();
+
+ let seq: Vec<_> = (0..N).map(|i| (i, i.to_string())).collect();
+
+ pool.broadcast(|_| {
+ let mut par: Vec<_> = (0..N)
+ .into_par_iter()
+ .flat_map(|i| {
+ once_with(move || {
+ // Using rayon within the serial iterator creates an opportunity for
+ // work-stealing to make par_bridge's mutex accidentally recursive.
+ rayon::join(move || i, move || i.to_string())
+ })
+ .par_bridge()
+ })
+ .collect();
+ par.par_sort_unstable();
+ assert_eq!(seq, par);
+ });
+}
diff --git a/tests/producer_split_at.rs b/tests/producer_split_at.rs
index 3a49059..d710504 100644
--- a/tests/producer_split_at.rs
+++ b/tests/producer_split_at.rs
@@ -214,6 +214,50 @@ fn slice_chunks_exact_mut() {
}
#[test]
+fn slice_rchunks() {
+ let s: Vec<_> = (0..10).collect();
+ for len in 1..s.len() + 2 {
+ let v: Vec<_> = s.rchunks(len).collect();
+ check(&v, || s.par_rchunks(len));
+ }
+}
+
+#[test]
+fn slice_rchunks_exact() {
+ let s: Vec<_> = (0..10).collect();
+ for len in 1..s.len() + 2 {
+ let v: Vec<_> = s.rchunks_exact(len).collect();
+ check(&v, || s.par_rchunks_exact(len));
+ }
+}
+
+#[test]
+fn slice_rchunks_mut() {
+ let mut s: Vec<_> = (0..10).collect();
+ let mut v: Vec<_> = s.clone();
+ for len in 1..s.len() + 2 {
+ let expected: Vec<_> = v.rchunks_mut(len).collect();
+ map_triples(expected.len() + 1, |i, j, k| {
+ Split::forward(s.par_rchunks_mut(len), i, j, k, &expected);
+ Split::reverse(s.par_rchunks_mut(len), i, j, k, &expected);
+ });
+ }
+}
+
+#[test]
+fn slice_rchunks_exact_mut() {
+ let mut s: Vec<_> = (0..10).collect();
+ let mut v: Vec<_> = s.clone();
+ for len in 1..s.len() + 2 {
+ let expected: Vec<_> = v.rchunks_exact_mut(len).collect();
+ map_triples(expected.len() + 1, |i, j, k| {
+ Split::forward(s.par_rchunks_exact_mut(len), i, j, k, &expected);
+ Split::reverse(s.par_rchunks_exact_mut(len), i, j, k, &expected);
+ });
+ }
+}
+
+#[test]
fn slice_windows() {
let s: Vec<_> = (0..10).collect();
let v: Vec<_> = s.windows(2).collect();
diff --git a/tests/sort-panic-safe.rs b/tests/sort-panic-safe.rs
index 00a9731..95ef88d 100644
--- a/tests/sort-panic-safe.rs
+++ b/tests/sort-panic-safe.rs
@@ -8,11 +8,12 @@ use std::sync::atomic::AtomicUsize;
use std::sync::atomic::Ordering::Relaxed;
use std::thread;
-static VERSIONS: AtomicUsize = AtomicUsize::new(0);
+const ZERO: AtomicUsize = AtomicUsize::new(0);
+const LEN: usize = 20_000;
-lazy_static::lazy_static! {
- static ref DROP_COUNTS: Vec<AtomicUsize> = (0..20_000).map(|_| AtomicUsize::new(0)).collect();
-}
+static VERSIONS: AtomicUsize = ZERO;
+
+static DROP_COUNTS: [AtomicUsize; LEN] = [ZERO; LEN];
#[derive(Clone, Eq)]
struct DropCounter {
@@ -117,6 +118,7 @@ macro_rules! test {
thread_local!(static SILENCE_PANIC: Cell<bool> = Cell::new(false));
#[test]
+#[cfg_attr(any(target_os = "emscripten", target_family = "wasm"), ignore)]
fn sort_panic_safe() {
let prev = panic::take_hook();
panic::set_hook(Box::new(move |info| {
diff --git a/tests/str.rs b/tests/str.rs
index 0e1e35e..f73a261 100644
--- a/tests/str.rs
+++ b/tests/str.rs
@@ -64,6 +64,11 @@ pub fn execute_strings_split() {
let parallel: Vec<_> = string.par_split(separator).collect();
assert_eq!(serial, parallel);
+ let pattern: &[char] = &['\u{0}', separator, '\u{1F980}'];
+ let serial: Vec<_> = string.split(pattern).collect();
+ let parallel: Vec<_> = string.par_split(pattern).collect();
+ assert_eq!(serial, parallel);
+
let serial_fn: Vec<_> = string.split(|c| c == separator).collect();
let parallel_fn: Vec<_> = string.par_split(|c| c == separator).collect();
assert_eq!(serial_fn, parallel_fn);
@@ -73,9 +78,12 @@ pub fn execute_strings_split() {
let serial: Vec<_> = string.split_terminator(separator).collect();
let parallel: Vec<_> = string.par_split_terminator(separator).collect();
assert_eq!(serial, parallel);
- }
- for &(string, separator) in &tests {
+ let pattern: &[char] = &['\u{0}', separator, '\u{1F980}'];
+ let serial: Vec<_> = string.split_terminator(pattern).collect();
+ let parallel: Vec<_> = string.par_split_terminator(pattern).collect();
+ assert_eq!(serial, parallel);
+
let serial: Vec<_> = string.split_terminator(|c| c == separator).collect();
let parallel: Vec<_> = string.par_split_terminator(|c| c == separator).collect();
assert_eq!(serial, parallel);
@@ -99,6 +107,11 @@ pub fn execute_strings_split() {
let parallel: Vec<_> = string.par_matches(separator).collect();
assert_eq!(serial, parallel);
+ let pattern: &[char] = &['\u{0}', separator, '\u{1F980}'];
+ let serial: Vec<_> = string.matches(pattern).collect();
+ let parallel: Vec<_> = string.par_matches(pattern).collect();
+ assert_eq!(serial, parallel);
+
let serial_fn: Vec<_> = string.matches(|c| c == separator).collect();
let parallel_fn: Vec<_> = string.par_matches(|c| c == separator).collect();
assert_eq!(serial_fn, parallel_fn);
@@ -109,6 +122,11 @@ pub fn execute_strings_split() {
let parallel: Vec<_> = string.par_match_indices(separator).collect();
assert_eq!(serial, parallel);
+ let pattern: &[char] = &['\u{0}', separator, '\u{1F980}'];
+ let serial: Vec<_> = string.match_indices(pattern).collect();
+ let parallel: Vec<_> = string.par_match_indices(pattern).collect();
+ assert_eq!(serial, parallel);
+
let serial_fn: Vec<_> = string.match_indices(|c| c == separator).collect();
let parallel_fn: Vec<_> = string.par_match_indices(|c| c == separator).collect();
assert_eq!(serial_fn, parallel_fn);