From 041839ceabbc67165512fde0d33c91347b758487 Mon Sep 17 00:00:00 2001 From: Jakub Kotur Date: Mon, 21 Dec 2020 17:28:15 +0100 Subject: Initial import of rayon-1.5.0. Bug: 155309706 Change-Id: I6ff7de1cb89d093d7938abf78d586ed76da85b0d --- src/collections/binary_heap.rs | 120 + src/collections/btree_map.rs | 66 + src/collections/btree_set.rs | 52 + src/collections/hash_map.rs | 96 + src/collections/hash_set.rs | 80 + src/collections/linked_list.rs | 66 + src/collections/mod.rs | 84 + src/collections/vec_deque.rs | 159 ++ src/compile_fail/cannot_collect_filtermap_data.rs | 16 + src/compile_fail/cannot_zip_filtered_data.rs | 16 + src/compile_fail/cell_par_iter.rs | 15 + src/compile_fail/mod.rs | 7 + src/compile_fail/must_use.rs | 67 + src/compile_fail/no_send_par_iter.rs | 64 + src/compile_fail/rc_par_iter.rs | 17 + src/delegate.rs | 70 + src/iter/chain.rs | 268 ++ src/iter/chunks.rs | 216 ++ src/iter/cloned.rs | 223 ++ src/iter/collect/consumer.rs | 159 ++ src/iter/collect/mod.rs | 171 ++ src/iter/collect/test.rs | 385 +++ src/iter/copied.rs | 223 ++ src/iter/empty.rs | 104 + src/iter/enumerate.rs | 133 + src/iter/extend.rs | 376 +++ src/iter/filter.rs | 141 + src/iter/filter_map.rs | 142 + src/iter/find.rs | 120 + src/iter/find_first_last/mod.rs | 238 ++ src/iter/find_first_last/test.rs | 106 + src/iter/flat_map.rs | 154 + src/iter/flat_map_iter.rs | 147 + src/iter/flatten.rs | 140 + src/iter/flatten_iter.rs | 132 + src/iter/fold.rs | 302 ++ src/iter/for_each.rs | 77 + src/iter/from_par_iter.rs | 228 ++ src/iter/inspect.rs | 257 ++ src/iter/interleave.rs | 336 +++ src/iter/interleave_shortest.rs | 85 + src/iter/intersperse.rs | 410 +++ src/iter/len.rs | 271 ++ src/iter/map.rs | 259 ++ src/iter/map_with.rs | 573 ++++ src/iter/mod.rs | 3121 +++++++++++++++++++++ src/iter/multizip.rs | 338 +++ src/iter/noop.rs | 59 + src/iter/once.rs | 68 + src/iter/panic_fuse.rs | 342 +++ src/iter/par_bridge.rs | 201 ++ src/iter/plumbing/README.md | 315 +++ src/iter/plumbing/mod.rs | 484 ++++ src/iter/positions.rs | 137 + src/iter/product.rs | 114 + src/iter/reduce.rs | 116 + src/iter/repeat.rs | 241 ++ src/iter/rev.rs | 123 + src/iter/skip.rs | 88 + src/iter/splitter.rs | 174 ++ src/iter/step_by.rs | 144 + src/iter/sum.rs | 110 + src/iter/take.rs | 86 + src/iter/test.rs | 2188 +++++++++++++++ src/iter/try_fold.rs | 294 ++ src/iter/try_reduce.rs | 129 + src/iter/try_reduce_with.rs | 134 + src/iter/unzip.rs | 464 +++ src/iter/update.rs | 327 +++ src/iter/while_some.rs | 154 + src/iter/zip.rs | 159 ++ src/iter/zip_eq.rs | 72 + src/lib.rs | 120 + src/math.rs | 54 + src/option.rs | 203 ++ src/par_either.rs | 74 + src/prelude.rs | 17 + src/private.rs | 26 + src/range.rs | 368 +++ src/range_inclusive.rs | 288 ++ src/result.rs | 132 + src/slice/mergesort.rs | 763 +++++ src/slice/mod.rs | 1203 ++++++++ src/slice/quicksort.rs | 800 ++++++ src/slice/test.rs | 148 + src/split_producer.rs | 132 + src/str.rs | 874 ++++++ src/string.rs | 48 + src/vec.rs | 245 ++ 89 files changed, 23048 insertions(+) create mode 100644 src/collections/binary_heap.rs create mode 100644 src/collections/btree_map.rs create mode 100644 src/collections/btree_set.rs create mode 100644 src/collections/hash_map.rs create mode 100644 src/collections/hash_set.rs create mode 100644 src/collections/linked_list.rs create mode 100644 src/collections/mod.rs create mode 100644 src/collections/vec_deque.rs create mode 100644 src/compile_fail/cannot_collect_filtermap_data.rs create mode 100644 src/compile_fail/cannot_zip_filtered_data.rs create mode 100644 src/compile_fail/cell_par_iter.rs create mode 100644 src/compile_fail/mod.rs create mode 100644 src/compile_fail/must_use.rs create mode 100644 src/compile_fail/no_send_par_iter.rs create mode 100644 src/compile_fail/rc_par_iter.rs create mode 100644 src/delegate.rs create mode 100644 src/iter/chain.rs create mode 100644 src/iter/chunks.rs create mode 100644 src/iter/cloned.rs create mode 100644 src/iter/collect/consumer.rs create mode 100644 src/iter/collect/mod.rs create mode 100644 src/iter/collect/test.rs create mode 100644 src/iter/copied.rs create mode 100644 src/iter/empty.rs create mode 100644 src/iter/enumerate.rs create mode 100644 src/iter/extend.rs create mode 100644 src/iter/filter.rs create mode 100644 src/iter/filter_map.rs create mode 100644 src/iter/find.rs create mode 100644 src/iter/find_first_last/mod.rs create mode 100644 src/iter/find_first_last/test.rs create mode 100644 src/iter/flat_map.rs create mode 100644 src/iter/flat_map_iter.rs create mode 100644 src/iter/flatten.rs create mode 100644 src/iter/flatten_iter.rs create mode 100644 src/iter/fold.rs create mode 100644 src/iter/for_each.rs create mode 100644 src/iter/from_par_iter.rs create mode 100644 src/iter/inspect.rs create mode 100644 src/iter/interleave.rs create mode 100644 src/iter/interleave_shortest.rs create mode 100644 src/iter/intersperse.rs create mode 100644 src/iter/len.rs create mode 100644 src/iter/map.rs create mode 100644 src/iter/map_with.rs create mode 100644 src/iter/mod.rs create mode 100644 src/iter/multizip.rs create mode 100644 src/iter/noop.rs create mode 100644 src/iter/once.rs create mode 100644 src/iter/panic_fuse.rs create mode 100644 src/iter/par_bridge.rs create mode 100644 src/iter/plumbing/README.md create mode 100644 src/iter/plumbing/mod.rs create mode 100644 src/iter/positions.rs create mode 100644 src/iter/product.rs create mode 100644 src/iter/reduce.rs create mode 100644 src/iter/repeat.rs create mode 100644 src/iter/rev.rs create mode 100644 src/iter/skip.rs create mode 100644 src/iter/splitter.rs create mode 100644 src/iter/step_by.rs create mode 100644 src/iter/sum.rs create mode 100644 src/iter/take.rs create mode 100644 src/iter/test.rs create mode 100644 src/iter/try_fold.rs create mode 100644 src/iter/try_reduce.rs create mode 100644 src/iter/try_reduce_with.rs create mode 100644 src/iter/unzip.rs create mode 100644 src/iter/update.rs create mode 100644 src/iter/while_some.rs create mode 100644 src/iter/zip.rs create mode 100644 src/iter/zip_eq.rs create mode 100644 src/lib.rs create mode 100644 src/math.rs create mode 100644 src/option.rs create mode 100644 src/par_either.rs create mode 100644 src/prelude.rs create mode 100644 src/private.rs create mode 100644 src/range.rs create mode 100644 src/range_inclusive.rs create mode 100644 src/result.rs create mode 100644 src/slice/mergesort.rs create mode 100644 src/slice/mod.rs create mode 100644 src/slice/quicksort.rs create mode 100644 src/slice/test.rs create mode 100644 src/split_producer.rs create mode 100644 src/str.rs create mode 100644 src/string.rs create mode 100644 src/vec.rs (limited to 'src') diff --git a/src/collections/binary_heap.rs b/src/collections/binary_heap.rs new file mode 100644 index 0000000..fa90312 --- /dev/null +++ b/src/collections/binary_heap.rs @@ -0,0 +1,120 @@ +//! This module contains the parallel iterator types for heaps +//! (`BinaryHeap`). You will rarely need to interact with it directly +//! unless you have need to name one of the iterator types. + +use std::collections::BinaryHeap; + +use crate::iter::plumbing::*; +use crate::iter::*; + +use crate::vec; + +/// Parallel iterator over a binary heap +#[derive(Debug, Clone)] +pub struct IntoIter { + inner: vec::IntoIter, +} + +impl IntoParallelIterator for BinaryHeap { + type Item = T; + type Iter = IntoIter; + + fn into_par_iter(self) -> Self::Iter { + IntoIter { + inner: Vec::from(self).into_par_iter(), + } + } +} + +delegate_indexed_iterator! { + IntoIter => T, + impl +} + +/// Parallel iterator over an immutable reference to a binary heap +#[derive(Debug)] +pub struct Iter<'a, T: Ord + Sync> { + inner: vec::IntoIter<&'a T>, +} + +impl<'a, T: Ord + Sync> Clone for Iter<'a, T> { + fn clone(&self) -> Self { + Iter { + inner: self.inner.clone(), + } + } +} + +into_par_vec! { + &'a BinaryHeap => Iter<'a, T>, + impl<'a, T: Ord + Sync> +} + +delegate_indexed_iterator! { + Iter<'a, T> => &'a T, + impl<'a, T: Ord + Sync + 'a> +} + +// `BinaryHeap` doesn't have a mutable `Iterator` + +/// Draining parallel iterator that moves out of a binary heap, +/// but keeps the total capacity. +#[derive(Debug)] +pub struct Drain<'a, T: Ord + Send> { + heap: &'a mut BinaryHeap, +} + +impl<'a, T: Ord + Send> ParallelDrainFull for &'a mut BinaryHeap { + type Iter = Drain<'a, T>; + type Item = T; + + fn par_drain(self) -> Self::Iter { + Drain { heap: self } + } +} + +impl<'a, T: Ord + Send> ParallelIterator for Drain<'a, T> { + type Item = T; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + bridge(self, consumer) + } + + fn opt_len(&self) -> Option { + Some(self.len()) + } +} + +impl<'a, T: Ord + Send> IndexedParallelIterator for Drain<'a, T> { + fn drive(self, consumer: C) -> C::Result + where + C: Consumer, + { + bridge(self, consumer) + } + + fn len(&self) -> usize { + self.heap.len() + } + + fn with_producer(self, callback: CB) -> CB::Output + where + CB: ProducerCallback, + { + super::DrainGuard::new(self.heap) + .par_drain(..) + .with_producer(callback) + } +} + +impl<'a, T: Ord + Send> Drop for Drain<'a, T> { + fn drop(&mut self) { + if !self.heap.is_empty() { + // We must not have produced, so just call a normal drain to remove the items. + self.heap.drain(); + } + } +} diff --git a/src/collections/btree_map.rs b/src/collections/btree_map.rs new file mode 100644 index 0000000..12436dc --- /dev/null +++ b/src/collections/btree_map.rs @@ -0,0 +1,66 @@ +//! This module contains the parallel iterator types for B-Tree maps +//! (`BTreeMap`). You will rarely need to interact with it directly +//! unless you have need to name one of the iterator types. + +use std::collections::BTreeMap; + +use crate::iter::plumbing::*; +use crate::iter::*; + +use crate::vec; + +/// Parallel iterator over a B-Tree map +#[derive(Debug)] // std doesn't Clone +pub struct IntoIter { + inner: vec::IntoIter<(K, V)>, +} + +into_par_vec! { + BTreeMap => IntoIter, + impl +} + +delegate_iterator! { + IntoIter => (K, V), + impl +} + +/// Parallel iterator over an immutable reference to a B-Tree map +#[derive(Debug)] +pub struct Iter<'a, K: Ord + Sync, V: Sync> { + inner: vec::IntoIter<(&'a K, &'a V)>, +} + +impl<'a, K: Ord + Sync, V: Sync> Clone for Iter<'a, K, V> { + fn clone(&self) -> Self { + Iter { + inner: self.inner.clone(), + } + } +} + +into_par_vec! { + &'a BTreeMap => Iter<'a, K, V>, + impl<'a, K: Ord + Sync, V: Sync> +} + +delegate_iterator! { + Iter<'a, K, V> => (&'a K, &'a V), + impl<'a, K: Ord + Sync + 'a, V: Sync + 'a> +} + +/// Parallel iterator over a mutable reference to a B-Tree map +#[derive(Debug)] +pub struct IterMut<'a, K: Ord + Sync, V: Send> { + inner: vec::IntoIter<(&'a K, &'a mut V)>, +} + +into_par_vec! { + &'a mut BTreeMap => IterMut<'a, K, V>, + impl<'a, K: Ord + Sync, V: Send> +} + +delegate_iterator! { + IterMut<'a, K, V> => (&'a K, &'a mut V), + impl<'a, K: Ord + Sync + 'a, V: Send + 'a> +} diff --git a/src/collections/btree_set.rs b/src/collections/btree_set.rs new file mode 100644 index 0000000..061d37c --- /dev/null +++ b/src/collections/btree_set.rs @@ -0,0 +1,52 @@ +//! This module contains the parallel iterator types for B-Tree sets +//! (`BTreeSet`). You will rarely need to interact with it directly +//! unless you have need to name one of the iterator types. + +use std::collections::BTreeSet; + +use crate::iter::plumbing::*; +use crate::iter::*; + +use crate::vec; + +/// Parallel iterator over a B-Tree set +#[derive(Debug)] // std doesn't Clone +pub struct IntoIter { + inner: vec::IntoIter, +} + +into_par_vec! { + BTreeSet => IntoIter, + impl +} + +delegate_iterator! { + IntoIter => T, + impl +} + +/// Parallel iterator over an immutable reference to a B-Tree set +#[derive(Debug)] +pub struct Iter<'a, T: Ord + Sync> { + inner: vec::IntoIter<&'a T>, +} + +impl<'a, T: Ord + Sync + 'a> Clone for Iter<'a, T> { + fn clone(&self) -> Self { + Iter { + inner: self.inner.clone(), + } + } +} + +into_par_vec! { + &'a BTreeSet => Iter<'a, T>, + impl<'a, T: Ord + Sync> +} + +delegate_iterator! { + Iter<'a, T> => &'a T, + impl<'a, T: Ord + Sync + 'a> +} + +// `BTreeSet` doesn't have a mutable `Iterator` diff --git a/src/collections/hash_map.rs b/src/collections/hash_map.rs new file mode 100644 index 0000000..b657851 --- /dev/null +++ b/src/collections/hash_map.rs @@ -0,0 +1,96 @@ +//! This module contains the parallel iterator types for hash maps +//! (`HashMap`). You will rarely need to interact with it directly +//! unless you have need to name one of the iterator types. + +use std::collections::HashMap; +use std::hash::{BuildHasher, Hash}; +use std::marker::PhantomData; + +use crate::iter::plumbing::*; +use crate::iter::*; + +use crate::vec; + +/// Parallel iterator over a hash map +#[derive(Debug)] // std doesn't Clone +pub struct IntoIter { + inner: vec::IntoIter<(K, V)>, +} + +into_par_vec! { + HashMap => IntoIter, + impl +} + +delegate_iterator! { + IntoIter => (K, V), + impl +} + +/// Parallel iterator over an immutable reference to a hash map +#[derive(Debug)] +pub struct Iter<'a, K: Hash + Eq + Sync, V: Sync> { + inner: vec::IntoIter<(&'a K, &'a V)>, +} + +impl<'a, K: Hash + Eq + Sync, V: Sync> Clone for Iter<'a, K, V> { + fn clone(&self) -> Self { + Iter { + inner: self.inner.clone(), + } + } +} + +into_par_vec! { + &'a HashMap => Iter<'a, K, V>, + impl<'a, K: Hash + Eq + Sync, V: Sync, S: BuildHasher> +} + +delegate_iterator! { + Iter<'a, K, V> => (&'a K, &'a V), + impl<'a, K: Hash + Eq + Sync + 'a, V: Sync + 'a> +} + +/// Parallel iterator over a mutable reference to a hash map +#[derive(Debug)] +pub struct IterMut<'a, K: Hash + Eq + Sync, V: Send> { + inner: vec::IntoIter<(&'a K, &'a mut V)>, +} + +into_par_vec! { + &'a mut HashMap => IterMut<'a, K, V>, + impl<'a, K: Hash + Eq + Sync, V: Send, S: BuildHasher> +} + +delegate_iterator! { + IterMut<'a, K, V> => (&'a K, &'a mut V), + impl<'a, K: Hash + Eq + Sync + 'a, V: Send + 'a> +} + +/// Draining parallel iterator that moves out of a hash map, +/// but keeps the total capacity. +#[derive(Debug)] +pub struct Drain<'a, K: Hash + Eq + Send, V: Send> { + inner: vec::IntoIter<(K, V)>, + marker: PhantomData<&'a mut HashMap>, +} + +impl<'a, K: Hash + Eq + Send, V: Send, S: BuildHasher> ParallelDrainFull + for &'a mut HashMap +{ + type Iter = Drain<'a, K, V>; + type Item = (K, V); + + fn par_drain(self) -> Self::Iter { + let vec: Vec<_> = self.drain().collect(); + Drain { + inner: vec.into_par_iter(), + marker: PhantomData, + } + } +} + +delegate_iterator! { + Drain<'_, K, V> => (K, V), + impl +} diff --git a/src/collections/hash_set.rs b/src/collections/hash_set.rs new file mode 100644 index 0000000..b6ee1c1 --- /dev/null +++ b/src/collections/hash_set.rs @@ -0,0 +1,80 @@ +//! This module contains the parallel iterator types for hash sets +//! (`HashSet`). You will rarely need to interact with it directly +//! unless you have need to name one of the iterator types. + +use std::collections::HashSet; +use std::hash::{BuildHasher, Hash}; +use std::marker::PhantomData; + +use crate::iter::plumbing::*; +use crate::iter::*; + +use crate::vec; + +/// Parallel iterator over a hash set +#[derive(Debug)] // std doesn't Clone +pub struct IntoIter { + inner: vec::IntoIter, +} + +into_par_vec! { + HashSet => IntoIter, + impl +} + +delegate_iterator! { + IntoIter => T, + impl +} + +/// Parallel iterator over an immutable reference to a hash set +#[derive(Debug)] +pub struct Iter<'a, T: Hash + Eq + Sync> { + inner: vec::IntoIter<&'a T>, +} + +impl<'a, T: Hash + Eq + Sync> Clone for Iter<'a, T> { + fn clone(&self) -> Self { + Iter { + inner: self.inner.clone(), + } + } +} + +into_par_vec! { + &'a HashSet => Iter<'a, T>, + impl<'a, T: Hash + Eq + Sync, S: BuildHasher> +} + +delegate_iterator! { + Iter<'a, T> => &'a T, + impl<'a, T: Hash + Eq + Sync + 'a> +} + +// `HashSet` doesn't have a mutable `Iterator` + +/// Draining parallel iterator that moves out of a hash set, +/// but keeps the total capacity. +#[derive(Debug)] +pub struct Drain<'a, T: Hash + Eq + Send> { + inner: vec::IntoIter, + marker: PhantomData<&'a mut HashSet>, +} + +impl<'a, T: Hash + Eq + Send, S: BuildHasher> ParallelDrainFull for &'a mut HashSet { + type Iter = Drain<'a, T>; + type Item = T; + + fn par_drain(self) -> Self::Iter { + let vec: Vec<_> = self.drain().collect(); + Drain { + inner: vec.into_par_iter(), + marker: PhantomData, + } + } +} + +delegate_iterator! { + Drain<'_, T> => T, + impl +} diff --git a/src/collections/linked_list.rs b/src/collections/linked_list.rs new file mode 100644 index 0000000..bddd2b0 --- /dev/null +++ b/src/collections/linked_list.rs @@ -0,0 +1,66 @@ +//! This module contains the parallel iterator types for linked lists +//! (`LinkedList`). You will rarely need to interact with it directly +//! unless you have need to name one of the iterator types. + +use std::collections::LinkedList; + +use crate::iter::plumbing::*; +use crate::iter::*; + +use crate::vec; + +/// Parallel iterator over a linked list +#[derive(Debug, Clone)] +pub struct IntoIter { + inner: vec::IntoIter, +} + +into_par_vec! { + LinkedList => IntoIter, + impl +} + +delegate_iterator! { + IntoIter => T, + impl +} + +/// Parallel iterator over an immutable reference to a linked list +#[derive(Debug)] +pub struct Iter<'a, T: Sync> { + inner: vec::IntoIter<&'a T>, +} + +impl<'a, T: Sync> Clone for Iter<'a, T> { + fn clone(&self) -> Self { + Iter { + inner: self.inner.clone(), + } + } +} + +into_par_vec! { + &'a LinkedList => Iter<'a, T>, + impl<'a, T: Sync> +} + +delegate_iterator! { + Iter<'a, T> => &'a T, + impl<'a, T: Sync + 'a> +} + +/// Parallel iterator over a mutable reference to a linked list +#[derive(Debug)] +pub struct IterMut<'a, T: Send> { + inner: vec::IntoIter<&'a mut T>, +} + +into_par_vec! { + &'a mut LinkedList => IterMut<'a, T>, + impl<'a, T: Send> +} + +delegate_iterator! { + IterMut<'a, T> => &'a mut T, + impl<'a, T: Send + 'a> +} diff --git a/src/collections/mod.rs b/src/collections/mod.rs new file mode 100644 index 0000000..d9b7988 --- /dev/null +++ b/src/collections/mod.rs @@ -0,0 +1,84 @@ +//! Parallel iterator types for [standard collections][std::collections] +//! +//! You will rarely need to interact with this module directly unless you need +//! to name one of the iterator types. +//! +//! [std::collections]: https://doc.rust-lang.org/stable/std/collections/ + +/// Convert an iterable collection into a parallel iterator by first +/// collecting into a temporary `Vec`, then iterating that. +macro_rules! into_par_vec { + ($t:ty => $iter:ident<$($i:tt),*>, impl $($args:tt)*) => { + impl $($args)* IntoParallelIterator for $t { + type Item = <$t as IntoIterator>::Item; + type Iter = $iter<$($i),*>; + + fn into_par_iter(self) -> Self::Iter { + use std::iter::FromIterator; + $iter { inner: Vec::from_iter(self).into_par_iter() } + } + } + }; +} + +pub mod binary_heap; +pub mod btree_map; +pub mod btree_set; +pub mod hash_map; +pub mod hash_set; +pub mod linked_list; +pub mod vec_deque; + +use self::drain_guard::DrainGuard; + +mod drain_guard { + use crate::iter::ParallelDrainRange; + use std::mem; + use std::ops::RangeBounds; + + /// A proxy for draining a collection by converting to a `Vec` and back. + /// + /// This is used for draining `BinaryHeap` and `VecDeque`, which both have + /// zero-allocation conversions to/from `Vec`, though not zero-cost: + /// - `BinaryHeap` will heapify from `Vec`, but at least that will be empty. + /// - `VecDeque` has to shift items to offset 0 when converting to `Vec`. + #[allow(missing_debug_implementations)] + pub(super) struct DrainGuard<'a, T, C: From>> { + collection: &'a mut C, + vec: Vec, + } + + impl<'a, T, C> DrainGuard<'a, T, C> + where + C: Default + From>, + Vec: From, + { + pub(super) fn new(collection: &'a mut C) -> Self { + Self { + // Temporarily steal the inner `Vec` so we can drain in place. + vec: Vec::from(mem::replace(collection, C::default())), + collection, + } + } + } + + impl<'a, T, C: From>> Drop for DrainGuard<'a, T, C> { + fn drop(&mut self) { + // Restore the collection from the `Vec` with its original capacity. + *self.collection = C::from(mem::replace(&mut self.vec, Vec::new())); + } + } + + impl<'a, T, C> ParallelDrainRange for &'a mut DrainGuard<'_, T, C> + where + T: Send, + C: From>, + { + type Iter = crate::vec::Drain<'a, T>; + type Item = T; + + fn par_drain>(self, range: R) -> Self::Iter { + self.vec.par_drain(range) + } + } +} diff --git a/src/collections/vec_deque.rs b/src/collections/vec_deque.rs new file mode 100644 index 0000000..f87ce6b --- /dev/null +++ b/src/collections/vec_deque.rs @@ -0,0 +1,159 @@ +//! This module contains the parallel iterator types for double-ended queues +//! (`VecDeque`). You will rarely need to interact with it directly +//! unless you have need to name one of the iterator types. + +use std::collections::VecDeque; +use std::ops::{Range, RangeBounds}; + +use crate::iter::plumbing::*; +use crate::iter::*; +use crate::math::simplify_range; + +use crate::slice; +use crate::vec; + +/// Parallel iterator over a double-ended queue +#[derive(Debug, Clone)] +pub struct IntoIter { + inner: vec::IntoIter, +} + +impl IntoParallelIterator for VecDeque { + type Item = T; + type Iter = IntoIter; + + fn into_par_iter(self) -> Self::Iter { + // NOTE: requires data movement if the deque doesn't start at offset 0. + let inner = Vec::from(self).into_par_iter(); + IntoIter { inner } + } +} + +delegate_indexed_iterator! { + IntoIter => T, + impl +} + +/// Parallel iterator over an immutable reference to a double-ended queue +#[derive(Debug)] +pub struct Iter<'a, T: Sync> { + inner: Chain, slice::Iter<'a, T>>, +} + +impl<'a, T: Sync> Clone for Iter<'a, T> { + fn clone(&self) -> Self { + Iter { + inner: self.inner.clone(), + } + } +} + +impl<'a, T: Sync> IntoParallelIterator for &'a VecDeque { + type Item = &'a T; + type Iter = Iter<'a, T>; + + fn into_par_iter(self) -> Self::Iter { + let (a, b) = self.as_slices(); + Iter { + inner: a.into_par_iter().chain(b), + } + } +} + +delegate_indexed_iterator! { + Iter<'a, T> => &'a T, + impl<'a, T: Sync + 'a> +} + +/// Parallel iterator over a mutable reference to a double-ended queue +#[derive(Debug)] +pub struct IterMut<'a, T: Send> { + inner: Chain, slice::IterMut<'a, T>>, +} + +impl<'a, T: Send> IntoParallelIterator for &'a mut VecDeque { + type Item = &'a mut T; + type Iter = IterMut<'a, T>; + + fn into_par_iter(self) -> Self::Iter { + let (a, b) = self.as_mut_slices(); + IterMut { + inner: a.into_par_iter().chain(b), + } + } +} + +delegate_indexed_iterator! { + IterMut<'a, T> => &'a mut T, + impl<'a, T: Send + 'a> +} + +/// Draining parallel iterator that moves a range out of a double-ended queue, +/// but keeps the total capacity. +#[derive(Debug)] +pub struct Drain<'a, T: Send> { + deque: &'a mut VecDeque, + range: Range, + orig_len: usize, +} + +impl<'a, T: Send> ParallelDrainRange for &'a mut VecDeque { + type Iter = Drain<'a, T>; + type Item = T; + + fn par_drain>(self, range: R) -> Self::Iter { + Drain { + orig_len: self.len(), + range: simplify_range(range, self.len()), + deque: self, + } + } +} + +impl<'a, T: Send> ParallelIterator for Drain<'a, T> { + type Item = T; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + bridge(self, consumer) + } + + fn opt_len(&self) -> Option { + Some(self.len()) + } +} + +impl<'a, T: Send> IndexedParallelIterator for Drain<'a, T> { + fn drive(self, consumer: C) -> C::Result + where + C: Consumer, + { + bridge(self, consumer) + } + + fn len(&self) -> usize { + self.range.len() + } + + fn with_producer(self, callback: CB) -> CB::Output + where + CB: ProducerCallback, + { + // NOTE: requires data movement if the deque doesn't start at offset 0. + super::DrainGuard::new(self.deque) + .par_drain(self.range.clone()) + .with_producer(callback) + } +} + +impl<'a, T: Send> Drop for Drain<'a, T> { + fn drop(&mut self) { + if self.deque.len() != self.orig_len - self.range.len() { + // We must not have produced, so just call a normal drain to remove the items. + assert_eq!(self.deque.len(), self.orig_len); + self.deque.drain(self.range.clone()); + } + } +} diff --git a/src/compile_fail/cannot_collect_filtermap_data.rs b/src/compile_fail/cannot_collect_filtermap_data.rs new file mode 100644 index 0000000..65ff875 --- /dev/null +++ b/src/compile_fail/cannot_collect_filtermap_data.rs @@ -0,0 +1,16 @@ +/*! ```compile_fail,E0599 + +use rayon::prelude::*; + +// zip requires data of exact size, but filter yields only bounded +// size, so check that we cannot apply it. + +fn main() { + let a: Vec = (0..1024).collect(); + let mut v = vec![]; + a.par_iter() + .filter_map(|&x| Some(x as f32)) + .collect_into_vec(&mut v); //~ ERROR no method +} + +``` */ diff --git a/src/compile_fail/cannot_zip_filtered_data.rs b/src/compile_fail/cannot_zip_filtered_data.rs new file mode 100644 index 0000000..de43fca --- /dev/null +++ b/src/compile_fail/cannot_zip_filtered_data.rs @@ -0,0 +1,16 @@ +/*! ```compile_fail,E0277 + +use rayon::prelude::*; + +// zip requires data of exact size, but filter yields only bounded +// size, so check that we cannot apply it. + +fn main() { + let mut a: Vec = (0..1024).rev().collect(); + let b: Vec = (0..1024).collect(); + + a.par_iter() + .zip(b.par_iter().filter(|&&x| x > 3)); //~ ERROR +} + +``` */ diff --git a/src/compile_fail/cell_par_iter.rs b/src/compile_fail/cell_par_iter.rs new file mode 100644 index 0000000..4af04b1 --- /dev/null +++ b/src/compile_fail/cell_par_iter.rs @@ -0,0 +1,15 @@ +/*! ```compile_fail,E0277 + +// Check that we can't use the par-iter API to access contents of a `Cell`. + +use rayon::prelude::*; +use std::cell::Cell; + +fn main() { + let c = Cell::new(42_i32); + (0_i32..1024).into_par_iter() + .map(|_| c.get()) //~ ERROR E0277 + .min(); +} + +``` */ diff --git a/src/compile_fail/mod.rs b/src/compile_fail/mod.rs new file mode 100644 index 0000000..13209a4 --- /dev/null +++ b/src/compile_fail/mod.rs @@ -0,0 +1,7 @@ +// These modules contain `compile_fail` doc tests. +mod cannot_collect_filtermap_data; +mod cannot_zip_filtered_data; +mod cell_par_iter; +mod must_use; +mod no_send_par_iter; +mod rc_par_iter; diff --git a/src/compile_fail/must_use.rs b/src/compile_fail/must_use.rs new file mode 100644 index 0000000..ac50a62 --- /dev/null +++ b/src/compile_fail/must_use.rs @@ -0,0 +1,67 @@ +// Check that we are flagged for ignoring `must_use` parallel adaptors. +// (unfortunately there's no error code for `unused_must_use`) + +macro_rules! must_use { + ($( $name:ident #[$expr:meta] )*) => {$( + /// First sanity check that the expression is OK. + /// + /// ``` + /// #![deny(unused_must_use)] + /// + /// use rayon::prelude::*; + /// + /// let v: Vec<_> = (0..100).map(Some).collect(); + /// let _ = + #[$expr] + /// ``` + /// + /// Now trigger the `must_use`. + /// + /// ```compile_fail + /// #![deny(unused_must_use)] + /// + /// use rayon::prelude::*; + /// + /// let v: Vec<_> = (0..100).map(Some).collect(); + #[$expr] + /// ``` + mod $name {} + )*} +} + +must_use! { + step_by /** v.par_iter().step_by(2); */ + chain /** v.par_iter().chain(&v); */ + chunks /** v.par_iter().chunks(2); */ + cloned /** v.par_iter().cloned(); */ + copied /** v.par_iter().copied(); */ + enumerate /** v.par_iter().enumerate(); */ + filter /** v.par_iter().filter(|_| true); */ + filter_map /** v.par_iter().filter_map(|x| *x); */ + flat_map /** v.par_iter().flat_map(|x| *x); */ + flat_map_iter /** v.par_iter().flat_map_iter(|x| *x); */ + flatten /** v.par_iter().flatten(); */ + flatten_iter /** v.par_iter().flatten_iter(); */ + fold /** v.par_iter().fold(|| 0, |x, _| x); */ + fold_with /** v.par_iter().fold_with(0, |x, _| x); */ + try_fold /** v.par_iter().try_fold(|| 0, |x, _| Some(x)); */ + try_fold_with /** v.par_iter().try_fold_with(0, |x, _| Some(x)); */ + inspect /** v.par_iter().inspect(|_| {}); */ + interleave /** v.par_iter().interleave(&v); */ + interleave_shortest /** v.par_iter().interleave_shortest(&v); */ + intersperse /** v.par_iter().intersperse(&None); */ + map /** v.par_iter().map(|x| x); */ + map_with /** v.par_iter().map_with(0, |_, x| x); */ + map_init /** v.par_iter().map_init(|| 0, |_, x| x); */ + panic_fuse /** v.par_iter().panic_fuse(); */ + positions /** v.par_iter().positions(|_| true); */ + rev /** v.par_iter().rev(); */ + skip /** v.par_iter().skip(1); */ + take /** v.par_iter().take(1); */ + update /** v.par_iter().update(|_| {}); */ + while_some /** v.par_iter().cloned().while_some(); */ + with_max_len /** v.par_iter().with_max_len(1); */ + with_min_len /** v.par_iter().with_min_len(1); */ + zip /** v.par_iter().zip(&v); */ + zip_eq /** v.par_iter().zip_eq(&v); */ +} diff --git a/src/compile_fail/no_send_par_iter.rs b/src/compile_fail/no_send_par_iter.rs new file mode 100644 index 0000000..1362c98 --- /dev/null +++ b/src/compile_fail/no_send_par_iter.rs @@ -0,0 +1,64 @@ +// Check that `!Send` types fail early. + +/** ```compile_fail,E0277 + +use rayon::prelude::*; +use std::ptr::null; + +#[derive(Copy, Clone)] +struct NoSend(*const ()); + +unsafe impl Sync for NoSend {} + +fn main() { + let x = Some(NoSend(null())); + + x.par_iter() + .map(|&x| x) //~ ERROR + .count(); //~ ERROR +} + +``` */ +mod map {} + +/** ```compile_fail,E0277 + +use rayon::prelude::*; +use std::ptr::null; + +#[derive(Copy, Clone)] +struct NoSend(*const ()); + +unsafe impl Sync for NoSend {} + +fn main() { + let x = Some(NoSend(null())); + + x.par_iter() + .filter_map(|&x| Some(x)) //~ ERROR + .count(); //~ ERROR +} + +``` */ +mod filter_map {} + +/** ```compile_fail,E0277 + +use rayon::prelude::*; +use std::ptr::null; + +#[derive(Copy, Clone)] +struct NoSend(*const ()); + +unsafe impl Sync for NoSend {} + +fn main() { + let x = Some(NoSend(null())); + + x.par_iter() + .cloned() //~ ERROR + .count(); //~ ERROR +} + +``` */ +mod cloned {} diff --git a/src/compile_fail/rc_par_iter.rs b/src/compile_fail/rc_par_iter.rs new file mode 100644 index 0000000..feaedb3 --- /dev/null +++ b/src/compile_fail/rc_par_iter.rs @@ -0,0 +1,17 @@ +/*! ```compile_fail,E0599 + +// Check that we can't use the par-iter API to access contents of an +// `Rc`. + +use rayon::prelude::*; +use std::rc::Rc; + +fn main() { + let x = vec![Rc::new(22), Rc::new(23)]; + let mut y = vec![]; + x.into_par_iter() //~ ERROR no method named `into_par_iter` + .map(|rc| *rc) + .collect_into_vec(&mut y); +} + +``` */ diff --git a/src/delegate.rs b/src/delegate.rs new file mode 100644 index 0000000..a537489 --- /dev/null +++ b/src/delegate.rs @@ -0,0 +1,70 @@ +//! Macros for delegating newtype iterators to inner types. + +// Note: these place `impl` bounds at the end, as token gobbling is the only way +// I know how to consume an arbitrary list of constraints, with `$($args:tt)*`. + +/// Creates a parallel iterator implementation which simply wraps an inner type +/// and delegates all methods inward. The actual struct must already be +/// declared with an `inner` field. +/// +/// The implementation of `IntoParallelIterator` should be added separately. +/// +/// # Example +/// +/// ``` +/// delegate_iterator!{ +/// MyIntoIter => (T, U), +/// impl +/// } +/// ``` +macro_rules! delegate_iterator { + ($iter:ty => $item:ty , + impl $( $args:tt )* + ) => { + impl $( $args )* ParallelIterator for $iter { + type Item = $item; + + fn drive_unindexed(self, consumer: C) -> C::Result + where C: UnindexedConsumer + { + self.inner.drive_unindexed(consumer) + } + + fn opt_len(&self) -> Option { + self.inner.opt_len() + } + } + } +} + +/// Creates an indexed parallel iterator implementation which simply wraps an +/// inner type and delegates all methods inward. The actual struct must already +/// be declared with an `inner` field. +macro_rules! delegate_indexed_iterator { + ($iter:ty => $item:ty , + impl $( $args:tt )* + ) => { + delegate_iterator!{ + $iter => $item , + impl $( $args )* + } + + impl $( $args )* IndexedParallelIterator for $iter { + fn drive(self, consumer: C) -> C::Result + where C: Consumer + { + self.inner.drive(consumer) + } + + fn len(&self) -> usize { + self.inner.len() + } + + fn with_producer(self, callback: CB) -> CB::Output + where CB: ProducerCallback + { + self.inner.with_producer(callback) + } + } + } +} diff --git a/src/iter/chain.rs b/src/iter/chain.rs new file mode 100644 index 0000000..48fce07 --- /dev/null +++ b/src/iter/chain.rs @@ -0,0 +1,268 @@ +use super::plumbing::*; +use super::*; +use rayon_core::join; +use std::cmp; +use std::iter; + +/// `Chain` is an iterator that joins `b` after `a` in one continuous iterator. +/// This struct is created by the [`chain()`] method on [`ParallelIterator`] +/// +/// [`chain()`]: trait.ParallelIterator.html#method.chain +/// [`ParallelIterator`]: trait.ParallelIterator.html +#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] +#[derive(Debug, Clone)] +pub struct Chain +where + A: ParallelIterator, + B: ParallelIterator, +{ + a: A, + b: B, +} + +impl Chain +where + A: ParallelIterator, + B: ParallelIterator, +{ + /// Creates a new `Chain` iterator. + pub(super) fn new(a: A, b: B) -> Self { + Chain { a, b } + } +} + +impl ParallelIterator for Chain +where + A: ParallelIterator, + B: ParallelIterator, +{ + type Item = A::Item; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + let Chain { a, b } = self; + + // If we returned a value from our own `opt_len`, then the collect consumer in particular + // will balk at being treated like an actual `UnindexedConsumer`. But when we do know the + // length, we can use `Consumer::split_at` instead, and this is still harmless for other + // truly-unindexed consumers too. + let (left, right, reducer) = if let Some(len) = a.opt_len() { + consumer.split_at(len) + } else { + let reducer = consumer.to_reducer(); + (consumer.split_off_left(), consumer, reducer) + }; + + let (a, b) = join(|| a.drive_unindexed(left), || b.drive_unindexed(right)); + reducer.reduce(a, b) + } + + fn opt_len(&self) -> Option { + self.a.opt_len()?.checked_add(self.b.opt_len()?) + } +} + +impl IndexedParallelIterator for Chain +where + A: IndexedParallelIterator, + B: IndexedParallelIterator, +{ + fn drive(self, consumer: C) -> C::Result + where + C: Consumer, + { + let Chain { a, b } = self; + let (left, right, reducer) = consumer.split_at(a.len()); + let (a, b) = join(|| a.drive(left), || b.drive(right)); + reducer.reduce(a, b) + } + + fn len(&self) -> usize { + self.a.len().checked_add(self.b.len()).expect("overflow") + } + + fn with_producer(self, callback: CB) -> CB::Output + where + CB: ProducerCallback, + { + let a_len = self.a.len(); + return self.a.with_producer(CallbackA { + callback, + a_len, + b: self.b, + }); + + struct CallbackA { + callback: CB, + a_len: usize, + b: B, + } + + impl ProducerCallback for CallbackA + where + B: IndexedParallelIterator, + CB: ProducerCallback, + { + type Output = CB::Output; + + fn callback(self, a_producer: A) -> Self::Output + where + A: Producer, + { + self.b.with_producer(CallbackB { + callback: self.callback, + a_len: self.a_len, + a_producer, + }) + } + } + + struct CallbackB { + callback: CB, + a_len: usize, + a_producer: A, + } + + impl ProducerCallback for CallbackB + where + A: Producer, + CB: ProducerCallback, + { + type Output = CB::Output; + + fn callback(self, b_producer: B) -> Self::Output + where + B: Producer, + { + let producer = ChainProducer::new(self.a_len, self.a_producer, b_producer); + self.callback.callback(producer) + } + } + } +} + +/// //////////////////////////////////////////////////////////////////////// + +struct ChainProducer +where + A: Producer, + B: Producer, +{ + a_len: usize, + a: A, + b: B, +} + +impl ChainProducer +where + A: Producer, + B: Producer, +{ + fn new(a_len: usize, a: A, b: B) -> Self { + ChainProducer { a_len, a, b } + } +} + +impl Producer for ChainProducer +where + A: Producer, + B: Producer, +{ + type Item = A::Item; + type IntoIter = ChainSeq; + + fn into_iter(self) -> Self::IntoIter { + ChainSeq::new(self.a.into_iter(), self.b.into_iter()) + } + + fn min_len(&self) -> usize { + cmp::max(self.a.min_len(), self.b.min_len()) + } + + fn max_len(&self) -> usize { + cmp::min(self.a.max_len(), self.b.max_len()) + } + + fn split_at(self, index: usize) -> (Self, Self) { + if index <= self.a_len { + let a_rem = self.a_len - index; + let (a_left, a_right) = self.a.split_at(index); + let (b_left, b_right) = self.b.split_at(0); + ( + ChainProducer::new(index, a_left, b_left), + ChainProducer::new(a_rem, a_right, b_right), + ) + } else { + let (a_left, a_right) = self.a.split_at(self.a_len); + let (b_left, b_right) = self.b.split_at(index - self.a_len); + ( + ChainProducer::new(self.a_len, a_left, b_left), + ChainProducer::new(0, a_right, b_right), + ) + } + } + + fn fold_with(self, mut folder: F) -> F + where + F: Folder, + { + folder = self.a.fold_with(folder); + if folder.full() { + folder + } else { + self.b.fold_with(folder) + } + } +} + +/// //////////////////////////////////////////////////////////////////////// +/// Wrapper for Chain to implement ExactSizeIterator + +struct ChainSeq { + chain: iter::Chain, +} + +impl ChainSeq { + fn new(a: A, b: B) -> ChainSeq + where + A: ExactSizeIterator, + B: ExactSizeIterator, + { + ChainSeq { chain: a.chain(b) } + } +} + +impl Iterator for ChainSeq +where + A: Iterator, + B: Iterator, +{ + type Item = A::Item; + + fn next(&mut self) -> Option { + self.chain.next() + } + + fn size_hint(&self) -> (usize, Option) { + self.chain.size_hint() + } +} + +impl ExactSizeIterator for ChainSeq +where + A: ExactSizeIterator, + B: ExactSizeIterator, +{ +} + +impl DoubleEndedIterator for ChainSeq +where + A: DoubleEndedIterator, + B: DoubleEndedIterator, +{ + fn next_back(&mut self) -> Option { + self.chain.next_back() + } +} diff --git a/src/iter/chunks.rs b/src/iter/chunks.rs new file mode 100644 index 0000000..be5f84c --- /dev/null +++ b/src/iter/chunks.rs @@ -0,0 +1,216 @@ +use std::cmp::min; + +use super::plumbing::*; +use super::*; +use crate::math::div_round_up; + +/// `Chunks` is an iterator that groups elements of an underlying iterator. +/// +/// This struct is created by the [`chunks()`] method on [`IndexedParallelIterator`] +/// +/// [`chunks()`]: trait.IndexedParallelIterator.html#method.chunks +/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html +#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] +#[derive(Debug, Clone)] +pub struct Chunks +where + I: IndexedParallelIterator, +{ + size: usize, + i: I, +} + +impl Chunks +where + I: IndexedParallelIterator, +{ + /// Creates a new `Chunks` iterator + pub(super) fn new(i: I, size: usize) -> Self { + Chunks { i, size } + } +} + +impl ParallelIterator for Chunks +where + I: IndexedParallelIterator, +{ + type Item = Vec; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: Consumer>, + { + bridge(self, consumer) + } + + fn opt_len(&self) -> Option { + Some(self.len()) + } +} + +impl IndexedParallelIterator for Chunks +where + I: IndexedParallelIterator, +{ + fn drive(self, consumer: C) -> C::Result + where + C: Consumer, + { + bridge(self, consumer) + } + + fn len(&self) -> usize { + div_round_up(self.i.len(), self.size) + } + + fn with_producer(self, callback: CB) -> CB::Output + where + CB: ProducerCallback, + { + let len = self.i.len(); + return self.i.with_producer(Callback { + size: self.size, + len, + callback, + }); + + struct Callback { + size: usize, + len: usize, + callback: CB, + } + + impl ProducerCallback for Callback + where + CB: ProducerCallback>, + { + type Output = CB::Output; + + fn callback

(self, base: P) -> CB::Output + where + P: Producer, + { + self.callback.callback(ChunkProducer { + chunk_size: self.size, + len: self.len, + base, + }) + } + } + } +} + +struct ChunkProducer

+where + P: Producer, +{ + chunk_size: usize, + len: usize, + base: P, +} + +impl

Producer for ChunkProducer

+where + P: Producer, +{ + type Item = Vec; + type IntoIter = ChunkSeq

; + + fn into_iter(self) -> Self::IntoIter { + ChunkSeq { + chunk_size: self.chunk_size, + len: self.len, + inner: if self.len > 0 { Some(self.base) } else { None }, + } + } + + fn split_at(self, index: usize) -> (Self, Self) { + let elem_index = min(index * self.chunk_size, self.len); + let (left, right) = self.base.split_at(elem_index); + ( + ChunkProducer { + chunk_size: self.chunk_size, + len: elem_index, + base: left, + }, + ChunkProducer { + chunk_size: self.chunk_size, + len: self.len - elem_index, + base: right, + }, + ) + } + + fn min_len(&self) -> usize { + div_round_up(self.base.min_len(), self.chunk_size) + } + + fn max_len(&self) -> usize { + self.base.max_len() / self.chunk_size + } +} + +struct ChunkSeq

{ + chunk_size: usize, + len: usize, + inner: Option

, +} + +impl

Iterator for ChunkSeq

+where + P: Producer, +{ + type Item = Vec; + + fn next(&mut self) -> Option { + let producer = self.inner.take()?; + if self.len > self.chunk_size { + let (left, right) = producer.split_at(self.chunk_size); + self.inner = Some(right); + self.len -= self.chunk_size; + Some(left.into_iter().collect()) + } else { + debug_assert!(self.len > 0); + self.len = 0; + Some(producer.into_iter().collect()) + } + } + + fn size_hint(&self) -> (usize, Option) { + let len = self.len(); + (len, Some(len)) + } +} + +impl

ExactSizeIterator for ChunkSeq

+where + P: Producer, +{ + #[inline] + fn len(&self) -> usize { + div_round_up(self.len, self.chunk_size) + } +} + +impl

DoubleEndedIterator for ChunkSeq

+where + P: Producer, +{ + fn next_back(&mut self) -> Option { + let producer = self.inner.take()?; + if self.len > self.chunk_size { + let mut size = self.len % self.chunk_size; + if size == 0 { + size = self.chunk_size; + } + let (left, right) = producer.split_at(self.len - size); + self.inner = Some(left); + self.len -= size; + Some(right.into_iter().collect()) + } else { + debug_assert!(self.len > 0); + self.len = 0; + Some(producer.into_iter().collect()) + } + } +} diff --git a/src/iter/cloned.rs b/src/iter/cloned.rs new file mode 100644 index 0000000..8d5f420 --- /dev/null +++ b/src/iter/cloned.rs @@ -0,0 +1,223 @@ +use super::plumbing::*; +use super::*; + +use std::iter; + +/// `Cloned` is an iterator that clones the elements of an underlying iterator. +/// +/// This struct is created by the [`cloned()`] method on [`ParallelIterator`] +/// +/// [`cloned()`]: trait.ParallelIterator.html#method.cloned +/// [`ParallelIterator`]: trait.ParallelIterator.html +#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] +#[derive(Debug, Clone)] +pub struct Cloned { + base: I, +} + +impl Cloned +where + I: ParallelIterator, +{ + /// Creates a new `Cloned` iterator. + pub(super) fn new(base: I) -> Self { + Cloned { base } + } +} + +impl<'a, T, I> ParallelIterator for Cloned +where + I: ParallelIterator, + T: 'a + Clone + Send + Sync, +{ + type Item = T; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + let consumer1 = ClonedConsumer::new(consumer); + self.base.drive_unindexed(consumer1) + } + + fn opt_len(&self) -> Option { + self.base.opt_len() + } +} + +impl<'a, T, I> IndexedParallelIterator for Cloned +where + I: IndexedParallelIterator, + T: 'a + Clone + Send + Sync, +{ + fn drive(self, consumer: C) -> C::Result + where + C: Consumer, + { + let consumer1 = ClonedConsumer::new(consumer); + self.base.drive(consumer1) + } + + fn len(&self) -> usize { + self.base.len() + } + + fn with_producer(self, callback: CB) -> CB::Output + where + CB: ProducerCallback, + { + return self.base.with_producer(Callback { callback }); + + struct Callback { + callback: CB, + } + + impl<'a, T, CB> ProducerCallback<&'a T> for Callback + where + CB: ProducerCallback, + T: 'a + Clone + Send, + { + type Output = CB::Output; + + fn callback

(self, base: P) -> CB::Output + where + P: Producer, + { + let producer = ClonedProducer { base }; + self.callback.callback(producer) + } + } + } +} + +/// //////////////////////////////////////////////////////////////////////// + +struct ClonedProducer

{ + base: P, +} + +impl<'a, T, P> Producer for ClonedProducer

(self, base: P) -> CB::Output + where + P: Producer, + { + let producer = CopiedProducer { base }; + self.callback.callback(producer) + } + } + } +} + +/// //////////////////////////////////////////////////////////////////////// + +struct CopiedProducer

{ + base: P, +} + +impl<'a, T, P> Producer for CopiedProducer

+where + P: Producer, + T: 'a + Copy, +{ + type Item = T; + type IntoIter = iter::Copied; + + fn into_iter(self) -> Self::IntoIter { + self.base.into_iter().copied() + } + + fn min_len(&self) -> usize { + self.base.min_len() + } + + fn max_len(&self) -> usize { + self.base.max_len() + } + + fn split_at(self, index: usize) -> (Self, Self) { + let (left, right) = self.base.split_at(index); + ( + CopiedProducer { base: left }, + CopiedProducer { base: right }, + ) + } + + fn fold_with(self, folder: F) -> F + where + F: Folder, + { + self.base.fold_with(CopiedFolder { base: folder }).base + } +} + +/// //////////////////////////////////////////////////////////////////////// +/// Consumer implementation + +struct CopiedConsumer { + base: C, +} + +impl CopiedConsumer { + fn new(base: C) -> Self { + CopiedConsumer { base } + } +} + +impl<'a, T, C> Consumer<&'a T> for CopiedConsumer +where + C: Consumer, + T: 'a + Copy, +{ + type Folder = CopiedFolder; + type Reducer = C::Reducer; + type Result = C::Result; + + fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) { + let (left, right, reducer) = self.base.split_at(index); + ( + CopiedConsumer::new(left), + CopiedConsumer::new(right), + reducer, + ) + } + + fn into_folder(self) -> Self::Folder { + CopiedFolder { + base: self.base.into_folder(), + } + } + + fn full(&self) -> bool { + self.base.full() + } +} + +impl<'a, T, C> UnindexedConsumer<&'a T> for CopiedConsumer +where + C: UnindexedConsumer, + T: 'a + Copy, +{ + fn split_off_left(&self) -> Self { + CopiedConsumer::new(self.base.split_off_left()) + } + + fn to_reducer(&self) -> Self::Reducer { + self.base.to_reducer() + } +} + +struct CopiedFolder { + base: F, +} + +impl<'a, T, F> Folder<&'a T> for CopiedFolder +where + F: Folder, + T: 'a + Copy, +{ + type Result = F::Result; + + fn consume(self, &item: &'a T) -> Self { + CopiedFolder { + base: self.base.consume(item), + } + } + + fn consume_iter(mut self, iter: I) -> Self + where + I: IntoIterator, + { + self.base = self.base.consume_iter(iter.into_iter().copied()); + self + } + + fn complete(self) -> F::Result { + self.base.complete() + } + + fn full(&self) -> bool { + self.base.full() + } +} diff --git a/src/iter/empty.rs b/src/iter/empty.rs new file mode 100644 index 0000000..85a2e5f --- /dev/null +++ b/src/iter/empty.rs @@ -0,0 +1,104 @@ +use crate::iter::plumbing::*; +use crate::iter::*; + +use std::fmt; +use std::marker::PhantomData; + +/// Creates a parallel iterator that produces nothing. +/// +/// This admits no parallelism on its own, but it could be used for code that +/// deals with generic parallel iterators. +/// +/// # Examples +/// +/// ``` +/// use rayon::prelude::*; +/// use rayon::iter::empty; +/// +/// let pi = (0..1234).into_par_iter() +/// .chain(empty()) +/// .chain(1234..10_000); +/// +/// assert_eq!(pi.count(), 10_000); +/// ``` +pub fn empty() -> Empty { + Empty { + marker: PhantomData, + } +} + +/// Iterator adaptor for [the `empty()` function](fn.empty.html). +pub struct Empty { + marker: PhantomData, +} + +impl Clone for Empty { + fn clone(&self) -> Self { + empty() + } +} + +impl fmt::Debug for Empty { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.pad("Empty") + } +} + +impl ParallelIterator for Empty { + type Item = T; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + self.drive(consumer) + } + + fn opt_len(&self) -> Option { + Some(0) + } +} + +impl IndexedParallelIterator for Empty { + fn drive(self, consumer: C) -> C::Result + where + C: Consumer, + { + consumer.into_folder().complete() + } + + fn len(&self) -> usize { + 0 + } + + fn with_producer(self, callback: CB) -> CB::Output + where + CB: ProducerCallback, + { + callback.callback(EmptyProducer(PhantomData)) + } +} + +/// Private empty producer +struct EmptyProducer(PhantomData); + +impl Producer for EmptyProducer { + type Item = T; + type IntoIter = std::iter::Empty; + + fn into_iter(self) -> Self::IntoIter { + std::iter::empty() + } + + fn split_at(self, index: usize) -> (Self, Self) { + debug_assert_eq!(index, 0); + (self, EmptyProducer(PhantomData)) + } + + fn fold_with(self, folder: F) -> F + where + F: Folder, + { + folder + } +} diff --git a/src/iter/enumerate.rs b/src/iter/enumerate.rs new file mode 100644 index 0000000..980ee7c --- /dev/null +++ b/src/iter/enumerate.rs @@ -0,0 +1,133 @@ +use super::plumbing::*; +use super::*; +use std::iter; +use std::ops::Range; +use std::usize; + +/// `Enumerate` is an iterator that returns the current count along with the element. +/// This struct is created by the [`enumerate()`] method on [`IndexedParallelIterator`] +/// +/// [`enumerate()`]: trait.IndexedParallelIterator.html#method.enumerate +/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html +#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] +#[derive(Debug, Clone)] +pub struct Enumerate { + base: I, +} + +impl Enumerate +where + I: IndexedParallelIterator, +{ + /// Creates a new `Enumerate` iterator. + pub(super) fn new(base: I) -> Self { + Enumerate { base } + } +} + +impl ParallelIterator for Enumerate +where + I: IndexedParallelIterator, +{ + type Item = (usize, I::Item); + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + bridge(self, consumer) + } + + fn opt_len(&self) -> Option { + Some(self.len()) + } +} + +impl IndexedParallelIterator for Enumerate +where + I: IndexedParallelIterator, +{ + fn drive>(self, consumer: C) -> C::Result { + bridge(self, consumer) + } + + fn len(&self) -> usize { + self.base.len() + } + + fn with_producer(self, callback: CB) -> CB::Output + where + CB: ProducerCallback, + { + return self.base.with_producer(Callback { callback }); + + struct Callback { + callback: CB, + } + + impl ProducerCallback for Callback + where + CB: ProducerCallback<(usize, I)>, + { + type Output = CB::Output; + fn callback

(self, base: P) -> CB::Output + where + P: Producer, + { + let producer = EnumerateProducer { base, offset: 0 }; + self.callback.callback(producer) + } + } + } +} + +/// //////////////////////////////////////////////////////////////////////// +/// Producer implementation + +struct EnumerateProducer

{ + base: P, + offset: usize, +} + +impl

Producer for EnumerateProducer

+where + P: Producer, +{ + type Item = (usize, P::Item); + type IntoIter = iter::Zip, P::IntoIter>; + + fn into_iter(self) -> Self::IntoIter { + // Enumerate only works for IndexedParallelIterators. Since those + // have a max length of usize::MAX, their max index is + // usize::MAX - 1, so the range 0..usize::MAX includes all + // possible indices. + // + // However, we should to use a precise end to the range, otherwise + // reversing the iterator may have to walk back a long ways before + // `Zip::next_back` can produce anything. + let base = self.base.into_iter(); + let end = self.offset + base.len(); + (self.offset..end).zip(base) + } + + fn min_len(&self) -> usize { + self.base.min_len() + } + fn max_len(&self) -> usize { + self.base.max_len() + } + + fn split_at(self, index: usize) -> (Self, Self) { + let (left, right) = self.base.split_at(index); + ( + EnumerateProducer { + base: left, + offset: self.offset, + }, + EnumerateProducer { + base: right, + offset: self.offset + index, + }, + ) + } +} diff --git a/src/iter/extend.rs b/src/iter/extend.rs new file mode 100644 index 0000000..fb89249 --- /dev/null +++ b/src/iter/extend.rs @@ -0,0 +1,376 @@ +use super::noop::NoopConsumer; +use super::{IntoParallelIterator, ParallelExtend, ParallelIterator}; + +use std::borrow::Cow; +use std::collections::LinkedList; +use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; +use std::collections::{BinaryHeap, VecDeque}; +use std::hash::{BuildHasher, Hash}; + +/// Performs a generic `par_extend` by collecting to a `LinkedList>` in +/// parallel, then extending the collection sequentially. +fn extend(collection: &mut C, par_iter: I, reserve: F) +where + I: IntoParallelIterator, + F: FnOnce(&mut C, &LinkedList>), + C: Extend, +{ + let list = collect(par_iter); + reserve(collection, &list); + for vec in list { + collection.extend(vec); + } +} + +pub(super) fn collect(par_iter: I) -> LinkedList> +where + I: IntoParallelIterator, +{ + par_iter + .into_par_iter() + .fold(Vec::new, vec_push) + .map(as_list) + .reduce(LinkedList::new, list_append) +} + +fn vec_push(mut vec: Vec, elem: T) -> Vec { + vec.push(elem); + vec +} + +fn as_list(item: T) -> LinkedList { + let mut list = LinkedList::new(); + list.push_back(item); + list +} + +fn list_append(mut list1: LinkedList, mut list2: LinkedList) -> LinkedList { + list1.append(&mut list2); + list1 +} + +/// Computes the total length of a `LinkedList>`. +pub(super) fn len(list: &LinkedList>) -> usize { + list.iter().map(Vec::len).sum() +} + +fn no_reserve(_: &mut C, _: &LinkedList>) {} + +fn heap_reserve(heap: &mut BinaryHeap, list: &LinkedList>) { + heap.reserve(len(list)); +} + +/// Extends a binary heap with items from a parallel iterator. +impl ParallelExtend for BinaryHeap +where + T: Ord + Send, +{ + fn par_extend(&mut self, par_iter: I) + where + I: IntoParallelIterator, + { + extend(self, par_iter, heap_reserve); + } +} + +/// Extends a binary heap with copied items from a parallel iterator. +impl<'a, T> ParallelExtend<&'a T> for BinaryHeap +where + T: 'a + Copy + Ord + Send + Sync, +{ + fn par_extend(&mut self, par_iter: I) + where + I: IntoParallelIterator, + { + extend(self, par_iter, heap_reserve); + } +} + +/// Extends a B-tree map with items from a parallel iterator. +impl ParallelExtend<(K, V)> for BTreeMap +where + K: Ord + Send, + V: Send, +{ + fn par_extend(&mut self, par_iter: I) + where + I: IntoParallelIterator, + { + extend(self, par_iter, no_reserve); + } +} + +/// Extends a B-tree map with copied items from a parallel iterator. +impl<'a, K: 'a, V: 'a> ParallelExtend<(&'a K, &'a V)> for BTreeMap +where + K: Copy + Ord + Send + Sync, + V: Copy + Send + Sync, +{ + fn par_extend(&mut self, par_iter: I) + where + I: IntoParallelIterator, + { + extend(self, par_iter, no_reserve); + } +} + +/// Extends a B-tree set with items from a parallel iterator. +impl ParallelExtend for BTreeSet +where + T: Ord + Send, +{ + fn par_extend(&mut self, par_iter: I) + where + I: IntoParallelIterator, + { + extend(self, par_iter, no_reserve); + } +} + +/// Extends a B-tree set with copied items from a parallel iterator. +impl<'a, T> ParallelExtend<&'a T> for BTreeSet +where + T: 'a + Copy + Ord + Send + Sync, +{ + fn par_extend(&mut self, par_iter: I) + where + I: IntoParallelIterator, + { + extend(self, par_iter, no_reserve); + } +} + +fn map_reserve(map: &mut HashMap, list: &LinkedList>) +where + K: Eq + Hash, + S: BuildHasher, +{ + map.reserve(len(list)); +} + +/// Extends a hash map with items from a parallel iterator. +impl ParallelExtend<(K, V)> for HashMap +where + K: Eq + Hash + Send, + V: Send, + S: BuildHasher + Send, +{ + fn par_extend(&mut self, par_iter: I) + where + I: IntoParallelIterator, + { + // See the map_collect benchmarks in rayon-demo for different strategies. + extend(self, par_iter, map_reserve); + } +} + +/// Extends a hash map with copied items from a parallel iterator. +impl<'a, K: 'a, V: 'a, S> ParallelExtend<(&'a K, &'a V)> for HashMap +where + K: Copy + Eq + Hash + Send + Sync, + V: Copy + Send + Sync, + S: BuildHasher + Send, +{ + fn par_extend(&mut self, par_iter: I) + where + I: IntoParallelIterator, + { + extend(self, par_iter, map_reserve); + } +} + +fn set_reserve(set: &mut HashSet, list: &LinkedList>) +where + T: Eq + Hash, + S: BuildHasher, +{ + set.reserve(len(list)); +} + +/// Extends a hash set with items from a parallel iterator. +impl ParallelExtend for HashSet +where + T: Eq + Hash + Send, + S: BuildHasher + Send, +{ + fn par_extend(&mut self, par_iter: I) + where + I: IntoParallelIterator, + { + extend(self, par_iter, set_reserve); + } +} + +/// Extends a hash set with copied items from a parallel iterator. +impl<'a, T, S> ParallelExtend<&'a T> for HashSet +where + T: 'a + Copy + Eq + Hash + Send + Sync, + S: BuildHasher + Send, +{ + fn par_extend(&mut self, par_iter: I) + where + I: IntoParallelIterator, + { + extend(self, par_iter, set_reserve); + } +} + +fn list_push_back(mut list: LinkedList, elem: T) -> LinkedList { + list.push_back(elem); + list +} + +/// Extends a linked list with items from a parallel iterator. +impl ParallelExtend for LinkedList +where + T: Send, +{ + fn par_extend(&mut self, par_iter: I) + where + I: IntoParallelIterator, + { + let mut list = par_iter + .into_par_iter() + .fold(LinkedList::new, list_push_back) + .reduce(LinkedList::new, list_append); + self.append(&mut list); + } +} + +/// Extends a linked list with copied items from a parallel iterator. +impl<'a, T> ParallelExtend<&'a T> for LinkedList +where + T: 'a + Copy + Send + Sync, +{ + fn par_extend(&mut self, par_iter: I) + where + I: IntoParallelIterator, + { + self.par_extend(par_iter.into_par_iter().cloned()) + } +} + +fn string_push(mut string: String, ch: char) -> String { + string.push(ch); + string +} + +/// Extends a string with characters from a parallel iterator. +impl ParallelExtend for String { + fn par_extend(&mut self, par_iter: I) + where + I: IntoParallelIterator, + { + // This is like `extend`, but `Vec` is less efficient to deal + // with than `String`, so instead collect to `LinkedList`. + let list: LinkedList<_> = par_iter + .into_par_iter() + .fold(String::new, string_push) + .map(as_list) + .reduce(LinkedList::new, list_append); + + self.reserve(list.iter().map(String::len).sum()); + self.extend(list) + } +} + +/// Extends a string with copied characters from a parallel iterator. +impl<'a> ParallelExtend<&'a char> for String { + fn par_extend(&mut self, par_iter: I) + where + I: IntoParallelIterator, + { + self.par_extend(par_iter.into_par_iter().cloned()) + } +} + +fn string_reserve>(string: &mut String, list: &LinkedList>) { + let len = list.iter().flatten().map(T::as_ref).map(str::len).sum(); + string.reserve(len); +} + +/// Extends a string with string slices from a parallel iterator. +impl<'a> ParallelExtend<&'a str> for String { + fn par_extend(&mut self, par_iter: I) + where + I: IntoParallelIterator, + { + extend(self, par_iter, string_reserve); + } +} + +/// Extends a string with strings from a parallel iterator. +impl ParallelExtend for String { + fn par_extend(&mut self, par_iter: I) + where + I: IntoParallelIterator, + { + extend(self, par_iter, string_reserve); + } +} + +/// Extends a string with string slices from a parallel iterator. +impl<'a> ParallelExtend> for String { + fn par_extend(&mut self, par_iter: I) + where + I: IntoParallelIterator>, + { + extend(self, par_iter, string_reserve); + } +} + +fn deque_reserve(deque: &mut VecDeque, list: &LinkedList>) { + deque.reserve(len(list)); +} + +/// Extends a deque with items from a parallel iterator. +impl ParallelExtend for VecDeque +where + T: Send, +{ + fn par_extend(&mut self, par_iter: I) + where + I: IntoParallelIterator, + { + extend(self, par_iter, deque_reserve); + } +} + +/// Extends a deque with copied items from a parallel iterator. +impl<'a, T> ParallelExtend<&'a T> for VecDeque +where + T: 'a + Copy + Send + Sync, +{ + fn par_extend(&mut self, par_iter: I) + where + I: IntoParallelIterator, + { + extend(self, par_iter, deque_reserve); + } +} + +// See the `collect` module for the `Vec` implementation. +// impl ParallelExtend for Vec + +/// Extends a vector with copied items from a parallel iterator. +impl<'a, T> ParallelExtend<&'a T> for Vec +where + T: 'a + Copy + Send + Sync, +{ + fn par_extend(&mut self, par_iter: I) + where + I: IntoParallelIterator, + { + self.par_extend(par_iter.into_par_iter().cloned()) + } +} + +/// Collapses all unit items from a parallel iterator into one. +impl ParallelExtend<()> for () { + fn par_extend(&mut self, par_iter: I) + where + I: IntoParallelIterator, + { + par_iter.into_par_iter().drive_unindexed(NoopConsumer) + } +} diff --git a/src/iter/filter.rs b/src/iter/filter.rs new file mode 100644 index 0000000..38627f7 --- /dev/null +++ b/src/iter/filter.rs @@ -0,0 +1,141 @@ +use super::plumbing::*; +use super::*; + +use std::fmt::{self, Debug}; + +/// `Filter` takes a predicate `filter_op` and filters out elements that match. +/// This struct is created by the [`filter()`] method on [`ParallelIterator`] +/// +/// [`filter()`]: trait.ParallelIterator.html#method.filter +/// [`ParallelIterator`]: trait.ParallelIterator.html +#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] +#[derive(Clone)] +pub struct Filter { + base: I, + filter_op: P, +} + +impl Debug for Filter { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Filter").field("base", &self.base).finish() + } +} + +impl Filter +where + I: ParallelIterator, +{ + /// Creates a new `Filter` iterator. + pub(super) fn new(base: I, filter_op: P) -> Self { + Filter { base, filter_op } + } +} + +impl ParallelIterator for Filter +where + I: ParallelIterator, + P: Fn(&I::Item) -> bool + Sync + Send, +{ + type Item = I::Item; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + let consumer1 = FilterConsumer::new(consumer, &self.filter_op); + self.base.drive_unindexed(consumer1) + } +} + +/// //////////////////////////////////////////////////////////////////////// +/// Consumer implementation + +struct FilterConsumer<'p, C, P> { + base: C, + filter_op: &'p P, +} + +impl<'p, C, P> FilterConsumer<'p, C, P> { + fn new(base: C, filter_op: &'p P) -> Self { + FilterConsumer { base, filter_op } + } +} + +impl<'p, T, C, P: 'p> Consumer for FilterConsumer<'p, C, P> +where + C: Consumer, + P: Fn(&T) -> bool + Sync, +{ + type Folder = FilterFolder<'p, C::Folder, P>; + type Reducer = C::Reducer; + type Result = C::Result; + + fn split_at(self, index: usize) -> (Self, Self, C::Reducer) { + let (left, right, reducer) = self.base.split_at(index); + ( + FilterConsumer::new(left, self.filter_op), + FilterConsumer::new(right, self.filter_op), + reducer, + ) + } + + fn into_folder(self) -> Self::Folder { + FilterFolder { + base: self.base.into_folder(), + filter_op: self.filter_op, + } + } + + fn full(&self) -> bool { + self.base.full() + } +} + +impl<'p, T, C, P: 'p> UnindexedConsumer for FilterConsumer<'p, C, P> +where + C: UnindexedConsumer, + P: Fn(&T) -> bool + Sync, +{ + fn split_off_left(&self) -> Self { + FilterConsumer::new(self.base.split_off_left(), &self.filter_op) + } + + fn to_reducer(&self) -> Self::Reducer { + self.base.to_reducer() + } +} + +struct FilterFolder<'p, C, P> { + base: C, + filter_op: &'p P, +} + +impl<'p, C, P, T> Folder for FilterFolder<'p, C, P> +where + C: Folder, + P: Fn(&T) -> bool + 'p, +{ + type Result = C::Result; + + fn consume(self, item: T) -> Self { + let filter_op = self.filter_op; + if filter_op(&item) { + let base = self.base.consume(item); + FilterFolder { base, filter_op } + } else { + self + } + } + + // This cannot easily specialize `consume_iter` to be better than + // the default, because that requires checking `self.base.full()` + // during a call to `self.base.consume_iter()`. (#632) + + fn complete(self) -> Self::Result { + self.base.complete() + } + + fn full(&self) -> bool { + self.base.full() + } +} diff --git a/src/iter/filter_map.rs b/src/iter/filter_map.rs new file mode 100644 index 0000000..f19c385 --- /dev/null +++ b/src/iter/filter_map.rs @@ -0,0 +1,142 @@ +use super::plumbing::*; +use super::*; + +use std::fmt::{self, Debug}; + +/// `FilterMap` creates an iterator that uses `filter_op` to both filter and map elements. +/// This struct is created by the [`filter_map()`] method on [`ParallelIterator`]. +/// +/// [`filter_map()`]: trait.ParallelIterator.html#method.filter_map +/// [`ParallelIterator`]: trait.ParallelIterator.html +#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] +#[derive(Clone)] +pub struct FilterMap { + base: I, + filter_op: P, +} + +impl Debug for FilterMap { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("FilterMap") + .field("base", &self.base) + .finish() + } +} + +impl FilterMap { + /// Creates a new `FilterMap` iterator. + pub(super) fn new(base: I, filter_op: P) -> Self { + FilterMap { base, filter_op } + } +} + +impl ParallelIterator for FilterMap +where + I: ParallelIterator, + P: Fn(I::Item) -> Option + Sync + Send, + R: Send, +{ + type Item = R; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + let consumer = FilterMapConsumer::new(consumer, &self.filter_op); + self.base.drive_unindexed(consumer) + } +} + +/// //////////////////////////////////////////////////////////////////////// +/// Consumer implementation + +struct FilterMapConsumer<'p, C, P> { + base: C, + filter_op: &'p P, +} + +impl<'p, C, P: 'p> FilterMapConsumer<'p, C, P> { + fn new(base: C, filter_op: &'p P) -> Self { + FilterMapConsumer { base, filter_op } + } +} + +impl<'p, T, U, C, P> Consumer for FilterMapConsumer<'p, C, P> +where + C: Consumer, + P: Fn(T) -> Option + Sync + 'p, +{ + type Folder = FilterMapFolder<'p, C::Folder, P>; + type Reducer = C::Reducer; + type Result = C::Result; + + fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) { + let (left, right, reducer) = self.base.split_at(index); + ( + FilterMapConsumer::new(left, self.filter_op), + FilterMapConsumer::new(right, self.filter_op), + reducer, + ) + } + + fn into_folder(self) -> Self::Folder { + let base = self.base.into_folder(); + FilterMapFolder { + base, + filter_op: self.filter_op, + } + } + + fn full(&self) -> bool { + self.base.full() + } +} + +impl<'p, T, U, C, P> UnindexedConsumer for FilterMapConsumer<'p, C, P> +where + C: UnindexedConsumer, + P: Fn(T) -> Option + Sync + 'p, +{ + fn split_off_left(&self) -> Self { + FilterMapConsumer::new(self.base.split_off_left(), &self.filter_op) + } + + fn to_reducer(&self) -> Self::Reducer { + self.base.to_reducer() + } +} + +struct FilterMapFolder<'p, C, P> { + base: C, + filter_op: &'p P, +} + +impl<'p, T, U, C, P> Folder for FilterMapFolder<'p, C, P> +where + C: Folder, + P: Fn(T) -> Option + Sync + 'p, +{ + type Result = C::Result; + + fn consume(self, item: T) -> Self { + let filter_op = self.filter_op; + if let Some(mapped_item) = filter_op(item) { + let base = self.base.consume(mapped_item); + FilterMapFolder { base, filter_op } + } else { + self + } + } + + // This cannot easily specialize `consume_iter` to be better than + // the default, because that requires checking `self.base.full()` + // during a call to `self.base.consume_iter()`. (#632) + + fn complete(self) -> C::Result { + self.base.complete() + } + + fn full(&self) -> bool { + self.base.full() + } +} diff --git a/src/iter/find.rs b/src/iter/find.rs new file mode 100644 index 0000000..971db2b --- /dev/null +++ b/src/iter/find.rs @@ -0,0 +1,120 @@ +use super::plumbing::*; +use super::*; +use std::sync::atomic::{AtomicBool, Ordering}; + +pub(super) fn find(pi: I, find_op: P) -> Option +where + I: ParallelIterator, + P: Fn(&I::Item) -> bool + Sync, +{ + let found = AtomicBool::new(false); + let consumer = FindConsumer::new(&find_op, &found); + pi.drive_unindexed(consumer) +} + +struct FindConsumer<'p, P> { + find_op: &'p P, + found: &'p AtomicBool, +} + +impl<'p, P> FindConsumer<'p, P> { + fn new(find_op: &'p P, found: &'p AtomicBool) -> Self { + FindConsumer { find_op, found } + } +} + +impl<'p, T, P: 'p> Consumer for FindConsumer<'p, P> +where + T: Send, + P: Fn(&T) -> bool + Sync, +{ + type Folder = FindFolder<'p, T, P>; + type Reducer = FindReducer; + type Result = Option; + + fn split_at(self, _index: usize) -> (Self, Self, Self::Reducer) { + (self.split_off_left(), self, FindReducer) + } + + fn into_folder(self) -> Self::Folder { + FindFolder { + find_op: self.find_op, + found: self.found, + item: None, + } + } + + fn full(&self) -> bool { + self.found.load(Ordering::Relaxed) + } +} + +impl<'p, T, P: 'p> UnindexedConsumer for FindConsumer<'p, P> +where + T: Send, + P: Fn(&T) -> bool + Sync, +{ + fn split_off_left(&self) -> Self { + FindConsumer::new(self.find_op, self.found) + } + + fn to_reducer(&self) -> Self::Reducer { + FindReducer + } +} + +struct FindFolder<'p, T, P> { + find_op: &'p P, + found: &'p AtomicBool, + item: Option, +} + +impl<'p, T, P> Folder for FindFolder<'p, T, P> +where + P: Fn(&T) -> bool + 'p, +{ + type Result = Option; + + fn consume(mut self, item: T) -> Self { + if (self.find_op)(&item) { + self.found.store(true, Ordering::Relaxed); + self.item = Some(item); + } + self + } + + fn consume_iter(mut self, iter: I) -> Self + where + I: IntoIterator, + { + fn not_full(found: &AtomicBool) -> impl Fn(&T) -> bool + '_ { + move |_| !found.load(Ordering::Relaxed) + } + + self.item = iter + .into_iter() + // stop iterating if another thread has found something + .take_while(not_full(&self.found)) + .find(self.find_op); + if self.item.is_some() { + self.found.store(true, Ordering::Relaxed) + } + self + } + + fn complete(self) -> Self::Result { + self.item + } + + fn full(&self) -> bool { + self.found.load(Ordering::Relaxed) + } +} + +struct FindReducer; + +impl Reducer> for FindReducer { + fn reduce(self, left: Option, right: Option) -> Option { + left.or(right) + } +} diff --git a/src/iter/find_first_last/mod.rs b/src/iter/find_first_last/mod.rs new file mode 100644 index 0000000..e5da8f0 --- /dev/null +++ b/src/iter/find_first_last/mod.rs @@ -0,0 +1,238 @@ +use super::plumbing::*; +use super::*; +use std::cell::Cell; +use std::sync::atomic::{AtomicUsize, Ordering}; + +#[cfg(test)] +mod test; + +// The key optimization for find_first is that a consumer can stop its search if +// some consumer to its left already found a match (and similarly for consumers +// to the right for find_last). To make this work, all consumers need some +// notion of their position in the data relative to other consumers, including +// unindexed consumers that have no built-in notion of position. +// +// To solve this, we assign each consumer a lower and upper bound for an +// imaginary "range" of data that it consumes. The initial consumer starts with +// the range 0..usize::max_value(). The split divides this range in half so that +// one resulting consumer has the range 0..(usize::max_value() / 2), and the +// other has (usize::max_value() / 2)..usize::max_value(). Every subsequent +// split divides the range in half again until it cannot be split anymore +// (i.e. its length is 1), in which case the split returns two consumers with +// the same range. In that case both consumers will continue to consume all +// their data regardless of whether a better match is found, but the reducer +// will still return the correct answer. + +#[derive(Copy, Clone)] +enum MatchPosition { + Leftmost, + Rightmost, +} + +/// Returns true if pos1 is a better match than pos2 according to MatchPosition +#[inline] +fn better_position(pos1: usize, pos2: usize, mp: MatchPosition) -> bool { + match mp { + MatchPosition::Leftmost => pos1 < pos2, + MatchPosition::Rightmost => pos1 > pos2, + } +} + +pub(super) fn find_first(pi: I, find_op: P) -> Option +where + I: ParallelIterator, + P: Fn(&I::Item) -> bool + Sync, +{ + let best_found = AtomicUsize::new(usize::max_value()); + let consumer = FindConsumer::new(&find_op, MatchPosition::Leftmost, &best_found); + pi.drive_unindexed(consumer) +} + +pub(super) fn find_last(pi: I, find_op: P) -> Option +where + I: ParallelIterator, + P: Fn(&I::Item) -> bool + Sync, +{ + let best_found = AtomicUsize::new(0); + let consumer = FindConsumer::new(&find_op, MatchPosition::Rightmost, &best_found); + pi.drive_unindexed(consumer) +} + +struct FindConsumer<'p, P> { + find_op: &'p P, + lower_bound: Cell, + upper_bound: usize, + match_position: MatchPosition, + best_found: &'p AtomicUsize, +} + +impl<'p, P> FindConsumer<'p, P> { + fn new(find_op: &'p P, match_position: MatchPosition, best_found: &'p AtomicUsize) -> Self { + FindConsumer { + find_op, + lower_bound: Cell::new(0), + upper_bound: usize::max_value(), + match_position, + best_found, + } + } + + fn current_index(&self) -> usize { + match self.match_position { + MatchPosition::Leftmost => self.lower_bound.get(), + MatchPosition::Rightmost => self.upper_bound, + } + } +} + +impl<'p, T, P> Consumer for FindConsumer<'p, P> +where + T: Send, + P: Fn(&T) -> bool + Sync, +{ + type Folder = FindFolder<'p, T, P>; + type Reducer = FindReducer; + type Result = Option; + + fn split_at(self, _index: usize) -> (Self, Self, Self::Reducer) { + let dir = self.match_position; + ( + self.split_off_left(), + self, + FindReducer { + match_position: dir, + }, + ) + } + + fn into_folder(self) -> Self::Folder { + FindFolder { + find_op: self.find_op, + boundary: self.current_index(), + match_position: self.match_position, + best_found: self.best_found, + item: None, + } + } + + fn full(&self) -> bool { + // can stop consuming if the best found index so far is *strictly* + // better than anything this consumer will find + better_position( + self.best_found.load(Ordering::Relaxed), + self.current_index(), + self.match_position, + ) + } +} + +impl<'p, T, P> UnindexedConsumer for FindConsumer<'p, P> +where + T: Send, + P: Fn(&T) -> bool + Sync, +{ + fn split_off_left(&self) -> Self { + // Upper bound for one consumer will be lower bound for the other. This + // overlap is okay, because only one of the bounds will be used for + // comparing against best_found; the other is kept only to be able to + // divide the range in half. + // + // When the resolution of usize has been exhausted (i.e. when + // upper_bound = lower_bound), both results of this split will have the + // same range. When that happens, we lose the ability to tell one + // consumer to stop working when the other finds a better match, but the + // reducer ensures that the best answer is still returned (see the test + // above). + let old_lower_bound = self.lower_bound.get(); + let median = old_lower_bound + ((self.upper_bound - old_lower_bound) / 2); + self.lower_bound.set(median); + + FindConsumer { + find_op: self.find_op, + lower_bound: Cell::new(old_lower_bound), + upper_bound: median, + match_position: self.match_position, + best_found: self.best_found, + } + } + + fn to_reducer(&self) -> Self::Reducer { + FindReducer { + match_position: self.match_position, + } + } +} + +struct FindFolder<'p, T, P> { + find_op: &'p P, + boundary: usize, + match_position: MatchPosition, + best_found: &'p AtomicUsize, + item: Option, +} + +impl<'p, P: 'p + Fn(&T) -> bool, T> Folder for FindFolder<'p, T, P> { + type Result = Option; + + fn consume(mut self, item: T) -> Self { + let found_best_in_range = match self.match_position { + MatchPosition::Leftmost => self.item.is_some(), + MatchPosition::Rightmost => false, + }; + + if !found_best_in_range && (self.find_op)(&item) { + // Continuously try to set best_found until we succeed or we + // discover a better match was already found. + let mut current = self.best_found.load(Ordering::Relaxed); + loop { + if better_position(current, self.boundary, self.match_position) { + break; + } + match self.best_found.compare_exchange_weak( + current, + self.boundary, + Ordering::Relaxed, + Ordering::Relaxed, + ) { + Ok(_) => { + self.item = Some(item); + break; + } + Err(v) => current = v, + } + } + } + self + } + + fn complete(self) -> Self::Result { + self.item + } + + fn full(&self) -> bool { + let found_best_in_range = match self.match_position { + MatchPosition::Leftmost => self.item.is_some(), + MatchPosition::Rightmost => false, + }; + + found_best_in_range + || better_position( + self.best_found.load(Ordering::Relaxed), + self.boundary, + self.match_position, + ) + } +} + +struct FindReducer { + match_position: MatchPosition, +} + +impl Reducer> for FindReducer { + fn reduce(self, left: Option, right: Option) -> Option { + match self.match_position { + MatchPosition::Leftmost => left.or(right), + MatchPosition::Rightmost => right.or(left), + } + } +} diff --git a/src/iter/find_first_last/test.rs b/src/iter/find_first_last/test.rs new file mode 100644 index 0000000..05271bc --- /dev/null +++ b/src/iter/find_first_last/test.rs @@ -0,0 +1,106 @@ +use super::*; +use std::sync::atomic::AtomicUsize; + +#[test] +fn same_range_first_consumers_return_correct_answer() { + let find_op = |x: &i32| x % 2 == 0; + let first_found = AtomicUsize::new(usize::max_value()); + let far_right_consumer = FindConsumer::new(&find_op, MatchPosition::Leftmost, &first_found); + + // We save a consumer that will be far to the right of the main consumer (and therefore not + // sharing an index range with that consumer) for fullness testing + let consumer = far_right_consumer.split_off_left(); + + // split until we have an indivisible range + let bits_in_usize = usize::min_value().count_zeros(); + + for _ in 0..bits_in_usize { + consumer.split_off_left(); + } + + let reducer = consumer.to_reducer(); + // the left and right folders should now have the same range, having + // exhausted the resolution of usize + let left_folder = consumer.split_off_left().into_folder(); + let right_folder = consumer.into_folder(); + + let left_folder = left_folder.consume(0).consume(1); + assert_eq!(left_folder.boundary, right_folder.boundary); + // expect not full even though a better match has been found because the + // ranges are the same + assert!(!right_folder.full()); + assert!(far_right_consumer.full()); + let right_folder = right_folder.consume(2).consume(3); + assert_eq!( + reducer.reduce(left_folder.complete(), right_folder.complete()), + Some(0) + ); +} + +#[test] +fn same_range_last_consumers_return_correct_answer() { + let find_op = |x: &i32| x % 2 == 0; + let last_found = AtomicUsize::new(0); + let consumer = FindConsumer::new(&find_op, MatchPosition::Rightmost, &last_found); + + // We save a consumer that will be far to the left of the main consumer (and therefore not + // sharing an index range with that consumer) for fullness testing + let far_left_consumer = consumer.split_off_left(); + + // split until we have an indivisible range + let bits_in_usize = usize::min_value().count_zeros(); + for _ in 0..bits_in_usize { + consumer.split_off_left(); + } + + let reducer = consumer.to_reducer(); + // due to the exact calculation in split_off_left, the very last consumer has a + // range of width 2, so we use the second-to-last consumer instead to get + // the same boundary on both folders + let consumer = consumer.split_off_left(); + let left_folder = consumer.split_off_left().into_folder(); + let right_folder = consumer.into_folder(); + let right_folder = right_folder.consume(2).consume(3); + assert_eq!(left_folder.boundary, right_folder.boundary); + // expect not full even though a better match has been found because the + // ranges are the same + assert!(!left_folder.full()); + assert!(far_left_consumer.full()); + let left_folder = left_folder.consume(0).consume(1); + assert_eq!( + reducer.reduce(left_folder.complete(), right_folder.complete()), + Some(2) + ); +} + +// These tests requires that a folder be assigned to an iterator with more than +// one element. We can't necessarily determine when that will happen for a given +// input to find_first/find_last, so we test the folder directly here instead. +#[test] +fn find_first_folder_does_not_clobber_first_found() { + let best_found = AtomicUsize::new(usize::max_value()); + let f = FindFolder { + find_op: &(|&_: &i32| -> bool { true }), + boundary: 0, + match_position: MatchPosition::Leftmost, + best_found: &best_found, + item: None, + }; + let f = f.consume(0_i32).consume(1_i32).consume(2_i32); + assert!(f.full()); + assert_eq!(f.complete(), Some(0_i32)); +} + +#[test] +fn find_last_folder_yields_last_match() { + let best_found = AtomicUsize::new(0); + let f = FindFolder { + find_op: &(|&_: &i32| -> bool { true }), + boundary: 0, + match_position: MatchPosition::Rightmost, + best_found: &best_found, + item: None, + }; + let f = f.consume(0_i32).consume(1_i32).consume(2_i32); + assert_eq!(f.complete(), Some(2_i32)); +} diff --git a/src/iter/flat_map.rs b/src/iter/flat_map.rs new file mode 100644 index 0000000..f264e1e --- /dev/null +++ b/src/iter/flat_map.rs @@ -0,0 +1,154 @@ +use super::plumbing::*; +use super::*; + +use std::fmt::{self, Debug}; + +/// `FlatMap` maps each element to a parallel iterator, then flattens these iterators together. +/// This struct is created by the [`flat_map()`] method on [`ParallelIterator`] +/// +/// [`flat_map()`]: trait.ParallelIterator.html#method.flat_map +/// [`ParallelIterator`]: trait.ParallelIterator.html +#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] +#[derive(Clone)] +pub struct FlatMap { + base: I, + map_op: F, +} + +impl Debug for FlatMap { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("FlatMap").field("base", &self.base).finish() + } +} + +impl FlatMap { + /// Creates a new `FlatMap` iterator. + pub(super) fn new(base: I, map_op: F) -> Self { + FlatMap { base, map_op } + } +} + +impl ParallelIterator for FlatMap +where + I: ParallelIterator, + F: Fn(I::Item) -> PI + Sync + Send, + PI: IntoParallelIterator, +{ + type Item = PI::Item; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + let consumer = FlatMapConsumer::new(consumer, &self.map_op); + self.base.drive_unindexed(consumer) + } +} + +/// //////////////////////////////////////////////////////////////////////// +/// Consumer implementation + +struct FlatMapConsumer<'f, C, F> { + base: C, + map_op: &'f F, +} + +impl<'f, C, F> FlatMapConsumer<'f, C, F> { + fn new(base: C, map_op: &'f F) -> Self { + FlatMapConsumer { base, map_op } + } +} + +impl<'f, T, U, C, F> Consumer for FlatMapConsumer<'f, C, F> +where + C: UnindexedConsumer, + F: Fn(T) -> U + Sync, + U: IntoParallelIterator, +{ + type Folder = FlatMapFolder<'f, C, F, C::Result>; + type Reducer = C::Reducer; + type Result = C::Result; + + fn split_at(self, index: usize) -> (Self, Self, C::Reducer) { + let (left, right, reducer) = self.base.split_at(index); + ( + FlatMapConsumer::new(left, self.map_op), + FlatMapConsumer::new(right, self.map_op), + reducer, + ) + } + + fn into_folder(self) -> Self::Folder { + FlatMapFolder { + base: self.base, + map_op: self.map_op, + previous: None, + } + } + + fn full(&self) -> bool { + self.base.full() + } +} + +impl<'f, T, U, C, F> UnindexedConsumer for FlatMapConsumer<'f, C, F> +where + C: UnindexedConsumer, + F: Fn(T) -> U + Sync, + U: IntoParallelIterator, +{ + fn split_off_left(&self) -> Self { + FlatMapConsumer::new(self.base.split_off_left(), self.map_op) + } + + fn to_reducer(&self) -> Self::Reducer { + self.base.to_reducer() + } +} + +struct FlatMapFolder<'f, C, F, R> { + base: C, + map_op: &'f F, + previous: Option, +} + +impl<'f, T, U, C, F> Folder for FlatMapFolder<'f, C, F, C::Result> +where + C: UnindexedConsumer, + F: Fn(T) -> U + Sync, + U: IntoParallelIterator, +{ + type Result = C::Result; + + fn consume(self, item: T) -> Self { + let map_op = self.map_op; + let par_iter = map_op(item).into_par_iter(); + let consumer = self.base.split_off_left(); + let result = par_iter.drive_unindexed(consumer); + + let previous = match self.previous { + None => Some(result), + Some(previous) => { + let reducer = self.base.to_reducer(); + Some(reducer.reduce(previous, result)) + } + }; + + FlatMapFolder { + base: self.base, + map_op, + previous, + } + } + + fn complete(self) -> Self::Result { + match self.previous { + Some(previous) => previous, + None => self.base.into_folder().complete(), + } + } + + fn full(&self) -> bool { + self.base.full() + } +} diff --git a/src/iter/flat_map_iter.rs b/src/iter/flat_map_iter.rs new file mode 100644 index 0000000..c76cf68 --- /dev/null +++ b/src/iter/flat_map_iter.rs @@ -0,0 +1,147 @@ +use super::plumbing::*; +use super::*; + +use std::fmt::{self, Debug}; + +/// `FlatMapIter` maps each element to a serial iterator, then flattens these iterators together. +/// This struct is created by the [`flat_map_iter()`] method on [`ParallelIterator`] +/// +/// [`flat_map_iter()`]: trait.ParallelIterator.html#method.flat_map_iter +/// [`ParallelIterator`]: trait.ParallelIterator.html +#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] +#[derive(Clone)] +pub struct FlatMapIter { + base: I, + map_op: F, +} + +impl Debug for FlatMapIter { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("FlatMapIter") + .field("base", &self.base) + .finish() + } +} + +impl FlatMapIter { + /// Creates a new `FlatMapIter` iterator. + pub(super) fn new(base: I, map_op: F) -> Self { + FlatMapIter { base, map_op } + } +} + +impl ParallelIterator for FlatMapIter +where + I: ParallelIterator, + F: Fn(I::Item) -> SI + Sync + Send, + SI: IntoIterator, + SI::Item: Send, +{ + type Item = SI::Item; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + let consumer = FlatMapIterConsumer::new(consumer, &self.map_op); + self.base.drive_unindexed(consumer) + } +} + +/// //////////////////////////////////////////////////////////////////////// +/// Consumer implementation + +struct FlatMapIterConsumer<'f, C, F> { + base: C, + map_op: &'f F, +} + +impl<'f, C, F> FlatMapIterConsumer<'f, C, F> { + fn new(base: C, map_op: &'f F) -> Self { + FlatMapIterConsumer { base, map_op } + } +} + +impl<'f, T, U, C, F> Consumer for FlatMapIterConsumer<'f, C, F> +where + C: UnindexedConsumer, + F: Fn(T) -> U + Sync, + U: IntoIterator, +{ + type Folder = FlatMapIterFolder<'f, C::Folder, F>; + type Reducer = C::Reducer; + type Result = C::Result; + + fn split_at(self, index: usize) -> (Self, Self, C::Reducer) { + let (left, right, reducer) = self.base.split_at(index); + ( + FlatMapIterConsumer::new(left, self.map_op), + FlatMapIterConsumer::new(right, self.map_op), + reducer, + ) + } + + fn into_folder(self) -> Self::Folder { + FlatMapIterFolder { + base: self.base.into_folder(), + map_op: self.map_op, + } + } + + fn full(&self) -> bool { + self.base.full() + } +} + +impl<'f, T, U, C, F> UnindexedConsumer for FlatMapIterConsumer<'f, C, F> +where + C: UnindexedConsumer, + F: Fn(T) -> U + Sync, + U: IntoIterator, +{ + fn split_off_left(&self) -> Self { + FlatMapIterConsumer::new(self.base.split_off_left(), self.map_op) + } + + fn to_reducer(&self) -> Self::Reducer { + self.base.to_reducer() + } +} + +struct FlatMapIterFolder<'f, C, F> { + base: C, + map_op: &'f F, +} + +impl<'f, T, U, C, F> Folder for FlatMapIterFolder<'f, C, F> +where + C: Folder, + F: Fn(T) -> U, + U: IntoIterator, +{ + type Result = C::Result; + + fn consume(self, item: T) -> Self { + let map_op = self.map_op; + let base = self.base.consume_iter(map_op(item)); + FlatMapIterFolder { base, map_op } + } + + fn consume_iter(self, iter: I) -> Self + where + I: IntoIterator, + { + let map_op = self.map_op; + let iter = iter.into_iter().flat_map(map_op); + let base = self.base.consume_iter(iter); + FlatMapIterFolder { base, map_op } + } + + fn complete(self) -> Self::Result { + self.base.complete() + } + + fn full(&self) -> bool { + self.base.full() + } +} diff --git a/src/iter/flatten.rs b/src/iter/flatten.rs new file mode 100644 index 0000000..29d88f9 --- /dev/null +++ b/src/iter/flatten.rs @@ -0,0 +1,140 @@ +use super::plumbing::*; +use super::*; + +/// `Flatten` turns each element to a parallel iterator, then flattens these iterators +/// together. This struct is created by the [`flatten()`] method on [`ParallelIterator`]. +/// +/// [`flatten()`]: trait.ParallelIterator.html#method.flatten +/// [`ParallelIterator`]: trait.ParallelIterator.html +#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] +#[derive(Debug, Clone)] +pub struct Flatten { + base: I, +} + +impl Flatten +where + I: ParallelIterator, + I::Item: IntoParallelIterator, +{ + /// Creates a new `Flatten` iterator. + pub(super) fn new(base: I) -> Self { + Flatten { base } + } +} + +impl ParallelIterator for Flatten +where + I: ParallelIterator, + I::Item: IntoParallelIterator, +{ + type Item = ::Item; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + let consumer = FlattenConsumer::new(consumer); + self.base.drive_unindexed(consumer) + } +} + +/// //////////////////////////////////////////////////////////////////////// +/// Consumer implementation + +struct FlattenConsumer { + base: C, +} + +impl FlattenConsumer { + fn new(base: C) -> Self { + FlattenConsumer { base } + } +} + +impl Consumer for FlattenConsumer +where + C: UnindexedConsumer, + T: IntoParallelIterator, +{ + type Folder = FlattenFolder; + type Reducer = C::Reducer; + type Result = C::Result; + + fn split_at(self, index: usize) -> (Self, Self, C::Reducer) { + let (left, right, reducer) = self.base.split_at(index); + ( + FlattenConsumer::new(left), + FlattenConsumer::new(right), + reducer, + ) + } + + fn into_folder(self) -> Self::Folder { + FlattenFolder { + base: self.base, + previous: None, + } + } + + fn full(&self) -> bool { + self.base.full() + } +} + +impl UnindexedConsumer for FlattenConsumer +where + C: UnindexedConsumer, + T: IntoParallelIterator, +{ + fn split_off_left(&self) -> Self { + FlattenConsumer::new(self.base.split_off_left()) + } + + fn to_reducer(&self) -> Self::Reducer { + self.base.to_reducer() + } +} + +struct FlattenFolder { + base: C, + previous: Option, +} + +impl Folder for FlattenFolder +where + C: UnindexedConsumer, + T: IntoParallelIterator, +{ + type Result = C::Result; + + fn consume(self, item: T) -> Self { + let par_iter = item.into_par_iter(); + let consumer = self.base.split_off_left(); + let result = par_iter.drive_unindexed(consumer); + + let previous = match self.previous { + None => Some(result), + Some(previous) => { + let reducer = self.base.to_reducer(); + Some(reducer.reduce(previous, result)) + } + }; + + FlattenFolder { + base: self.base, + previous, + } + } + + fn complete(self) -> Self::Result { + match self.previous { + Some(previous) => previous, + None => self.base.into_folder().complete(), + } + } + + fn full(&self) -> bool { + self.base.full() + } +} diff --git a/src/iter/flatten_iter.rs b/src/iter/flatten_iter.rs new file mode 100644 index 0000000..3ce0a3c --- /dev/null +++ b/src/iter/flatten_iter.rs @@ -0,0 +1,132 @@ +use super::plumbing::*; +use super::*; + +/// `FlattenIter` turns each element to a serial iterator, then flattens these iterators +/// together. This struct is created by the [`flatten_iter()`] method on [`ParallelIterator`]. +/// +/// [`flatten_iter()`]: trait.ParallelIterator.html#method.flatten_iter +/// [`ParallelIterator`]: trait.ParallelIterator.html +#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] +#[derive(Debug, Clone)] +pub struct FlattenIter { + base: I, +} + +impl FlattenIter +where + I: ParallelIterator, + I::Item: IntoIterator, + ::Item: Send, +{ + /// Creates a new `FlattenIter` iterator. + pub(super) fn new(base: I) -> Self { + FlattenIter { base } + } +} + +impl ParallelIterator for FlattenIter +where + I: ParallelIterator, + I::Item: IntoIterator, + ::Item: Send, +{ + type Item = ::Item; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + let consumer = FlattenIterConsumer::new(consumer); + self.base.drive_unindexed(consumer) + } +} + +/// //////////////////////////////////////////////////////////////////////// +/// Consumer implementation + +struct FlattenIterConsumer { + base: C, +} + +impl FlattenIterConsumer { + fn new(base: C) -> Self { + FlattenIterConsumer { base } + } +} + +impl Consumer for FlattenIterConsumer +where + C: UnindexedConsumer, + T: IntoIterator, +{ + type Folder = FlattenIterFolder; + type Reducer = C::Reducer; + type Result = C::Result; + + fn split_at(self, index: usize) -> (Self, Self, C::Reducer) { + let (left, right, reducer) = self.base.split_at(index); + ( + FlattenIterConsumer::new(left), + FlattenIterConsumer::new(right), + reducer, + ) + } + + fn into_folder(self) -> Self::Folder { + FlattenIterFolder { + base: self.base.into_folder(), + } + } + + fn full(&self) -> bool { + self.base.full() + } +} + +impl UnindexedConsumer for FlattenIterConsumer +where + C: UnindexedConsumer, + T: IntoIterator, +{ + fn split_off_left(&self) -> Self { + FlattenIterConsumer::new(self.base.split_off_left()) + } + + fn to_reducer(&self) -> Self::Reducer { + self.base.to_reducer() + } +} + +struct FlattenIterFolder { + base: C, +} + +impl Folder for FlattenIterFolder +where + C: Folder, + T: IntoIterator, +{ + type Result = C::Result; + + fn consume(self, item: T) -> Self { + let base = self.base.consume_iter(item); + FlattenIterFolder { base } + } + + fn consume_iter(self, iter: I) -> Self + where + I: IntoIterator, + { + let iter = iter.into_iter().flatten(); + let base = self.base.consume_iter(iter); + FlattenIterFolder { base } + } + + fn complete(self) -> Self::Result { + self.base.complete() + } + + fn full(&self) -> bool { + self.base.full() + } +} diff --git a/src/iter/fold.rs b/src/iter/fold.rs new file mode 100644 index 0000000..345afbd --- /dev/null +++ b/src/iter/fold.rs @@ -0,0 +1,302 @@ +use super::plumbing::*; +use super::*; + +use std::fmt::{self, Debug}; + +impl Fold +where + I: ParallelIterator, + F: Fn(U, I::Item) -> U + Sync + Send, + ID: Fn() -> U + Sync + Send, + U: Send, +{ + pub(super) fn new(base: I, identity: ID, fold_op: F) -> Self { + Fold { + base, + identity, + fold_op, + } + } +} + +/// `Fold` is an iterator that applies a function over an iterator producing a single value. +/// This struct is created by the [`fold()`] method on [`ParallelIterator`] +/// +/// [`fold()`]: trait.ParallelIterator.html#method.fold +/// [`ParallelIterator`]: trait.ParallelIterator.html +#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] +#[derive(Clone)] +pub struct Fold { + base: I, + identity: ID, + fold_op: F, +} + +impl Debug for Fold { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Fold").field("base", &self.base).finish() + } +} + +impl ParallelIterator for Fold +where + I: ParallelIterator, + F: Fn(U, I::Item) -> U + Sync + Send, + ID: Fn() -> U + Sync + Send, + U: Send, +{ + type Item = U; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + let consumer1 = FoldConsumer { + base: consumer, + fold_op: &self.fold_op, + identity: &self.identity, + }; + self.base.drive_unindexed(consumer1) + } +} + +struct FoldConsumer<'c, C, ID, F> { + base: C, + fold_op: &'c F, + identity: &'c ID, +} + +impl<'r, U, T, C, ID, F> Consumer for FoldConsumer<'r, C, ID, F> +where + C: Consumer, + F: Fn(U, T) -> U + Sync, + ID: Fn() -> U + Sync, + U: Send, +{ + type Folder = FoldFolder<'r, C::Folder, U, F>; + type Reducer = C::Reducer; + type Result = C::Result; + + fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) { + let (left, right, reducer) = self.base.split_at(index); + ( + FoldConsumer { base: left, ..self }, + FoldConsumer { + base: right, + ..self + }, + reducer, + ) + } + + fn into_folder(self) -> Self::Folder { + FoldFolder { + base: self.base.into_folder(), + item: (self.identity)(), + fold_op: self.fold_op, + } + } + + fn full(&self) -> bool { + self.base.full() + } +} + +impl<'r, U, T, C, ID, F> UnindexedConsumer for FoldConsumer<'r, C, ID, F> +where + C: UnindexedConsumer, + F: Fn(U, T) -> U + Sync, + ID: Fn() -> U + Sync, + U: Send, +{ + fn split_off_left(&self) -> Self { + FoldConsumer { + base: self.base.split_off_left(), + ..*self + } + } + + fn to_reducer(&self) -> Self::Reducer { + self.base.to_reducer() + } +} + +struct FoldFolder<'r, C, ID, F> { + base: C, + fold_op: &'r F, + item: ID, +} + +impl<'r, C, ID, F, T> Folder for FoldFolder<'r, C, ID, F> +where + C: Folder, + F: Fn(ID, T) -> ID + Sync, +{ + type Result = C::Result; + + fn consume(self, item: T) -> Self { + let item = (self.fold_op)(self.item, item); + FoldFolder { + base: self.base, + fold_op: self.fold_op, + item, + } + } + + fn consume_iter(self, iter: I) -> Self + where + I: IntoIterator, + { + fn not_full(base: &C) -> impl Fn(&T) -> bool + '_ + where + C: Folder, + { + move |_| !base.full() + } + + let base = self.base; + let item = iter + .into_iter() + // stop iterating if another thread has finished + .take_while(not_full(&base)) + .fold(self.item, self.fold_op); + + FoldFolder { + base, + item, + fold_op: self.fold_op, + } + } + + fn complete(self) -> C::Result { + self.base.consume(self.item).complete() + } + + fn full(&self) -> bool { + self.base.full() + } +} + +// /////////////////////////////////////////////////////////////////////////// + +impl FoldWith +where + I: ParallelIterator, + F: Fn(U, I::Item) -> U + Sync + Send, + U: Send + Clone, +{ + pub(super) fn new(base: I, item: U, fold_op: F) -> Self { + FoldWith { + base, + item, + fold_op, + } + } +} + +/// `FoldWith` is an iterator that applies a function over an iterator producing a single value. +/// This struct is created by the [`fold_with()`] method on [`ParallelIterator`] +/// +/// [`fold_with()`]: trait.ParallelIterator.html#method.fold_with +/// [`ParallelIterator`]: trait.ParallelIterator.html +#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] +#[derive(Clone)] +pub struct FoldWith { + base: I, + item: U, + fold_op: F, +} + +impl Debug for FoldWith { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("FoldWith") + .field("base", &self.base) + .field("item", &self.item) + .finish() + } +} + +impl ParallelIterator for FoldWith +where + I: ParallelIterator, + F: Fn(U, I::Item) -> U + Sync + Send, + U: Send + Clone, +{ + type Item = U; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + let consumer1 = FoldWithConsumer { + base: consumer, + item: self.item, + fold_op: &self.fold_op, + }; + self.base.drive_unindexed(consumer1) + } +} + +struct FoldWithConsumer<'c, C, U, F> { + base: C, + item: U, + fold_op: &'c F, +} + +impl<'r, U, T, C, F> Consumer for FoldWithConsumer<'r, C, U, F> +where + C: Consumer, + F: Fn(U, T) -> U + Sync, + U: Send + Clone, +{ + type Folder = FoldFolder<'r, C::Folder, U, F>; + type Reducer = C::Reducer; + type Result = C::Result; + + fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) { + let (left, right, reducer) = self.base.split_at(index); + ( + FoldWithConsumer { + base: left, + item: self.item.clone(), + ..self + }, + FoldWithConsumer { + base: right, + ..self + }, + reducer, + ) + } + + fn into_folder(self) -> Self::Folder { + FoldFolder { + base: self.base.into_folder(), + item: self.item, + fold_op: self.fold_op, + } + } + + fn full(&self) -> bool { + self.base.full() + } +} + +impl<'r, U, T, C, F> UnindexedConsumer for FoldWithConsumer<'r, C, U, F> +where + C: UnindexedConsumer, + F: Fn(U, T) -> U + Sync, + U: Send + Clone, +{ + fn split_off_left(&self) -> Self { + FoldWithConsumer { + base: self.base.split_off_left(), + item: self.item.clone(), + ..*self + } + } + + fn to_reducer(&self) -> Self::Reducer { + self.base.to_reducer() + } +} diff --git a/src/iter/for_each.rs b/src/iter/for_each.rs new file mode 100644 index 0000000..3b77beb --- /dev/null +++ b/src/iter/for_each.rs @@ -0,0 +1,77 @@ +use super::noop::*; +use super::plumbing::*; +use super::ParallelIterator; + +pub(super) fn for_each(pi: I, op: &F) +where + I: ParallelIterator, + F: Fn(T) + Sync, + T: Send, +{ + let consumer = ForEachConsumer { op }; + pi.drive_unindexed(consumer) +} + +struct ForEachConsumer<'f, F> { + op: &'f F, +} + +impl<'f, F, T> Consumer for ForEachConsumer<'f, F> +where + F: Fn(T) + Sync, +{ + type Folder = ForEachConsumer<'f, F>; + type Reducer = NoopReducer; + type Result = (); + + fn split_at(self, _index: usize) -> (Self, Self, NoopReducer) { + (self.split_off_left(), self, NoopReducer) + } + + fn into_folder(self) -> Self { + self + } + + fn full(&self) -> bool { + false + } +} + +impl<'f, F, T> Folder for ForEachConsumer<'f, F> +where + F: Fn(T) + Sync, +{ + type Result = (); + + fn consume(self, item: T) -> Self { + (self.op)(item); + self + } + + fn consume_iter(self, iter: I) -> Self + where + I: IntoIterator, + { + iter.into_iter().for_each(self.op); + self + } + + fn complete(self) {} + + fn full(&self) -> bool { + false + } +} + +impl<'f, F, T> UnindexedConsumer for ForEachConsumer<'f, F> +where + F: Fn(T) + Sync, +{ + fn split_off_left(&self) -> Self { + ForEachConsumer { op: self.op } + } + + fn to_reducer(&self) -> NoopReducer { + NoopReducer + } +} diff --git a/src/iter/from_par_iter.rs b/src/iter/from_par_iter.rs new file mode 100644 index 0000000..3240f32 --- /dev/null +++ b/src/iter/from_par_iter.rs @@ -0,0 +1,228 @@ +use super::noop::NoopConsumer; +use super::{FromParallelIterator, IntoParallelIterator, ParallelExtend, ParallelIterator}; + +use std::borrow::Cow; +use std::collections::LinkedList; +use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; +use std::collections::{BinaryHeap, VecDeque}; +use std::hash::{BuildHasher, Hash}; + +/// Creates an empty default collection and extends it. +fn collect_extended(par_iter: I) -> C +where + I: IntoParallelIterator, + C: ParallelExtend + Default, +{ + let mut collection = C::default(); + collection.par_extend(par_iter); + collection +} + +/// Collects items from a parallel iterator into a vector. +impl FromParallelIterator for Vec +where + T: Send, +{ + fn from_par_iter(par_iter: I) -> Self + where + I: IntoParallelIterator, + { + collect_extended(par_iter) + } +} + +/// Collects items from a parallel iterator into a vecdeque. +impl FromParallelIterator for VecDeque +where + T: Send, +{ + fn from_par_iter(par_iter: I) -> Self + where + I: IntoParallelIterator, + { + Vec::from_par_iter(par_iter).into() + } +} + +/// Collects items from a parallel iterator into a binaryheap. +/// The heap-ordering is calculated serially after all items are collected. +impl FromParallelIterator for BinaryHeap +where + T: Ord + Send, +{ + fn from_par_iter(par_iter: I) -> Self + where + I: IntoParallelIterator, + { + Vec::from_par_iter(par_iter).into() + } +} + +/// Collects items from a parallel iterator into a freshly allocated +/// linked list. +impl FromParallelIterator for LinkedList +where + T: Send, +{ + fn from_par_iter(par_iter: I) -> Self + where + I: IntoParallelIterator, + { + collect_extended(par_iter) + } +} + +/// Collects (key, value) pairs from a parallel iterator into a +/// hashmap. If multiple pairs correspond to the same key, then the +/// ones produced earlier in the parallel iterator will be +/// overwritten, just as with a sequential iterator. +impl FromParallelIterator<(K, V)> for HashMap +where + K: Eq + Hash + Send, + V: Send, + S: BuildHasher + Default + Send, +{ + fn from_par_iter(par_iter: I) -> Self + where + I: IntoParallelIterator, + { + collect_extended(par_iter) + } +} + +/// Collects (key, value) pairs from a parallel iterator into a +/// btreemap. If multiple pairs correspond to the same key, then the +/// ones produced earlier in the parallel iterator will be +/// overwritten, just as with a sequential iterator. +impl FromParallelIterator<(K, V)> for BTreeMap +where + K: Ord + Send, + V: Send, +{ + fn from_par_iter(par_iter: I) -> Self + where + I: IntoParallelIterator, + { + collect_extended(par_iter) + } +} + +/// Collects values from a parallel iterator into a hashset. +impl FromParallelIterator for HashSet +where + V: Eq + Hash + Send, + S: BuildHasher + Default + Send, +{ + fn from_par_iter(par_iter: I) -> Self + where + I: IntoParallelIterator, + { + collect_extended(par_iter) + } +} + +/// Collects values from a parallel iterator into a btreeset. +impl FromParallelIterator for BTreeSet +where + V: Send + Ord, +{ + fn from_par_iter(par_iter: I) -> Self + where + I: IntoParallelIterator, + { + collect_extended(par_iter) + } +} + +/// Collects characters from a parallel iterator into a string. +impl FromParallelIterator for String { + fn from_par_iter(par_iter: I) -> Self + where + I: IntoParallelIterator, + { + collect_extended(par_iter) + } +} + +/// Collects characters from a parallel iterator into a string. +impl<'a> FromParallelIterator<&'a char> for String { + fn from_par_iter(par_iter: I) -> Self + where + I: IntoParallelIterator, + { + collect_extended(par_iter) + } +} + +/// Collects string slices from a parallel iterator into a string. +impl<'a> FromParallelIterator<&'a str> for String { + fn from_par_iter(par_iter: I) -> Self + where + I: IntoParallelIterator, + { + collect_extended(par_iter) + } +} + +/// Collects strings from a parallel iterator into one large string. +impl FromParallelIterator for String { + fn from_par_iter(par_iter: I) -> Self + where + I: IntoParallelIterator, + { + collect_extended(par_iter) + } +} + +/// Collects string slices from a parallel iterator into a string. +impl<'a> FromParallelIterator> for String { + fn from_par_iter(par_iter: I) -> Self + where + I: IntoParallelIterator>, + { + collect_extended(par_iter) + } +} + +/// Collects an arbitrary `Cow` collection. +/// +/// Note, the standard library only has `FromIterator` for `Cow<'a, str>` and +/// `Cow<'a, [T]>`, because no one thought to add a blanket implementation +/// before it was stabilized. +impl<'a, C: ?Sized, T> FromParallelIterator for Cow<'a, C> +where + C: ToOwned, + C::Owned: FromParallelIterator, + T: Send, +{ + fn from_par_iter(par_iter: I) -> Self + where + I: IntoParallelIterator, + { + Cow::Owned(C::Owned::from_par_iter(par_iter)) + } +} + +/// Collapses all unit items from a parallel iterator into one. +/// +/// This is more useful when combined with higher-level abstractions, like +/// collecting to a `Result<(), E>` where you only care about errors: +/// +/// ``` +/// use std::io::*; +/// use rayon::prelude::*; +/// +/// let data = vec![1, 2, 3, 4, 5]; +/// let res: Result<()> = data.par_iter() +/// .map(|x| writeln!(stdout(), "{}", x)) +/// .collect(); +/// assert!(res.is_ok()); +/// ``` +impl FromParallelIterator<()> for () { + fn from_par_iter(par_iter: I) -> Self + where + I: IntoParallelIterator, + { + par_iter.into_par_iter().drive_unindexed(NoopConsumer) + } +} diff --git a/src/iter/inspect.rs b/src/iter/inspect.rs new file mode 100644 index 0000000..9b1cd09 --- /dev/null +++ b/src/iter/inspect.rs @@ -0,0 +1,257 @@ +use super::plumbing::*; +use super::*; + +use std::fmt::{self, Debug}; +use std::iter; + +/// `Inspect` is an iterator that calls a function with a reference to each +/// element before yielding it. +/// +/// This struct is created by the [`inspect()`] method on [`ParallelIterator`] +/// +/// [`inspect()`]: trait.ParallelIterator.html#method.inspect +/// [`ParallelIterator`]: trait.ParallelIterator.html +#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] +#[derive(Clone)] +pub struct Inspect { + base: I, + inspect_op: F, +} + +impl Debug for Inspect { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Inspect").field("base", &self.base).finish() + } +} + +impl Inspect +where + I: ParallelIterator, +{ + /// Creates a new `Inspect` iterator. + pub(super) fn new(base: I, inspect_op: F) -> Self { + Inspect { base, inspect_op } + } +} + +impl ParallelIterator for Inspect +where + I: ParallelIterator, + F: Fn(&I::Item) + Sync + Send, +{ + type Item = I::Item; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + let consumer1 = InspectConsumer::new(consumer, &self.inspect_op); + self.base.drive_unindexed(consumer1) + } + + fn opt_len(&self) -> Option { + self.base.opt_len() + } +} + +impl IndexedParallelIterator for Inspect +where + I: IndexedParallelIterator, + F: Fn(&I::Item) + Sync + Send, +{ + fn drive(self, consumer: C) -> C::Result + where + C: Consumer, + { + let consumer1 = InspectConsumer::new(consumer, &self.inspect_op); + self.base.drive(consumer1) + } + + fn len(&self) -> usize { + self.base.len() + } + + fn with_producer(self, callback: CB) -> CB::Output + where + CB: ProducerCallback, + { + return self.base.with_producer(Callback { + callback, + inspect_op: self.inspect_op, + }); + + struct Callback { + callback: CB, + inspect_op: F, + } + + impl ProducerCallback for Callback + where + CB: ProducerCallback, + F: Fn(&T) + Sync, + { + type Output = CB::Output; + + fn callback

(self, base: P) -> CB::Output + where + P: Producer, + { + let producer = InspectProducer { + base, + inspect_op: &self.inspect_op, + }; + self.callback.callback(producer) + } + } + } +} + +/// //////////////////////////////////////////////////////////////////////// + +struct InspectProducer<'f, P, F> { + base: P, + inspect_op: &'f F, +} + +impl<'f, P, F> Producer for InspectProducer<'f, P, F> +where + P: Producer, + F: Fn(&P::Item) + Sync, +{ + type Item = P::Item; + type IntoIter = iter::Inspect; + + fn into_iter(self) -> Self::IntoIter { + self.base.into_iter().inspect(self.inspect_op) + } + + fn min_len(&self) -> usize { + self.base.min_len() + } + + fn max_len(&self) -> usize { + self.base.max_len() + } + + fn split_at(self, index: usize) -> (Self, Self) { + let (left, right) = self.base.split_at(index); + ( + InspectProducer { + base: left, + inspect_op: self.inspect_op, + }, + InspectProducer { + base: right, + inspect_op: self.inspect_op, + }, + ) + } + + fn fold_with(self, folder: G) -> G + where + G: Folder, + { + let folder1 = InspectFolder { + base: folder, + inspect_op: self.inspect_op, + }; + self.base.fold_with(folder1).base + } +} + +/// //////////////////////////////////////////////////////////////////////// +/// Consumer implementation + +struct InspectConsumer<'f, C, F> { + base: C, + inspect_op: &'f F, +} + +impl<'f, C, F> InspectConsumer<'f, C, F> { + fn new(base: C, inspect_op: &'f F) -> Self { + InspectConsumer { base, inspect_op } + } +} + +impl<'f, T, C, F> Consumer for InspectConsumer<'f, C, F> +where + C: Consumer, + F: Fn(&T) + Sync, +{ + type Folder = InspectFolder<'f, C::Folder, F>; + type Reducer = C::Reducer; + type Result = C::Result; + + fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) { + let (left, right, reducer) = self.base.split_at(index); + ( + InspectConsumer::new(left, self.inspect_op), + InspectConsumer::new(right, self.inspect_op), + reducer, + ) + } + + fn into_folder(self) -> Self::Folder { + InspectFolder { + base: self.base.into_folder(), + inspect_op: self.inspect_op, + } + } + + fn full(&self) -> bool { + self.base.full() + } +} + +impl<'f, T, C, F> UnindexedConsumer for InspectConsumer<'f, C, F> +where + C: UnindexedConsumer, + F: Fn(&T) + Sync, +{ + fn split_off_left(&self) -> Self { + InspectConsumer::new(self.base.split_off_left(), &self.inspect_op) + } + + fn to_reducer(&self) -> Self::Reducer { + self.base.to_reducer() + } +} + +struct InspectFolder<'f, C, F> { + base: C, + inspect_op: &'f F, +} + +impl<'f, T, C, F> Folder for InspectFolder<'f, C, F> +where + C: Folder, + F: Fn(&T), +{ + type Result = C::Result; + + fn consume(self, item: T) -> Self { + (self.inspect_op)(&item); + InspectFolder { + base: self.base.consume(item), + inspect_op: self.inspect_op, + } + } + + fn consume_iter(mut self, iter: I) -> Self + where + I: IntoIterator, + { + self.base = self + .base + .consume_iter(iter.into_iter().inspect(self.inspect_op)); + self + } + + fn complete(self) -> C::Result { + self.base.complete() + } + + fn full(&self) -> bool { + self.base.full() + } +} diff --git a/src/iter/interleave.rs b/src/iter/interleave.rs new file mode 100644 index 0000000..b5d43d5 --- /dev/null +++ b/src/iter/interleave.rs @@ -0,0 +1,336 @@ +use super::plumbing::*; +use super::*; +use std::cmp; +use std::iter::Fuse; + +/// `Interleave` is an iterator that interleaves elements of iterators +/// `i` and `j` in one continuous iterator. This struct is created by +/// the [`interleave()`] method on [`IndexedParallelIterator`] +/// +/// [`interleave()`]: trait.IndexedParallelIterator.html#method.interleave +/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html +#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] +#[derive(Debug, Clone)] +pub struct Interleave +where + I: IndexedParallelIterator, + J: IndexedParallelIterator, +{ + i: I, + j: J, +} + +impl Interleave +where + I: IndexedParallelIterator, + J: IndexedParallelIterator, +{ + /// Creates a new `Interleave` iterator + pub(super) fn new(i: I, j: J) -> Self { + Interleave { i, j } + } +} + +impl ParallelIterator for Interleave +where + I: IndexedParallelIterator, + J: IndexedParallelIterator, +{ + type Item = I::Item; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: Consumer, + { + bridge(self, consumer) + } + + fn opt_len(&self) -> Option { + Some(self.len()) + } +} + +impl IndexedParallelIterator for Interleave +where + I: IndexedParallelIterator, + J: IndexedParallelIterator, +{ + fn drive(self, consumer: C) -> C::Result + where + C: Consumer, + { + bridge(self, consumer) + } + + fn len(&self) -> usize { + self.i.len().checked_add(self.j.len()).expect("overflow") + } + + fn with_producer(self, callback: CB) -> CB::Output + where + CB: ProducerCallback, + { + let (i_len, j_len) = (self.i.len(), self.j.len()); + return self.i.with_producer(CallbackI { + callback, + i_len, + j_len, + i_next: false, + j: self.j, + }); + + struct CallbackI { + callback: CB, + i_len: usize, + j_len: usize, + i_next: bool, + j: J, + } + + impl ProducerCallback for CallbackI + where + J: IndexedParallelIterator, + CB: ProducerCallback, + { + type Output = CB::Output; + + fn callback(self, i_producer: I) -> Self::Output + where + I: Producer, + { + self.j.with_producer(CallbackJ { + i_producer, + i_len: self.i_len, + j_len: self.j_len, + i_next: self.i_next, + callback: self.callback, + }) + } + } + + struct CallbackJ { + callback: CB, + i_len: usize, + j_len: usize, + i_next: bool, + i_producer: I, + } + + impl ProducerCallback for CallbackJ + where + I: Producer, + CB: ProducerCallback, + { + type Output = CB::Output; + + fn callback(self, j_producer: J) -> Self::Output + where + J: Producer, + { + let producer = InterleaveProducer::new( + self.i_producer, + j_producer, + self.i_len, + self.j_len, + self.i_next, + ); + self.callback.callback(producer) + } + } + } +} + +struct InterleaveProducer +where + I: Producer, + J: Producer, +{ + i: I, + j: J, + i_len: usize, + j_len: usize, + i_next: bool, +} + +impl InterleaveProducer +where + I: Producer, + J: Producer, +{ + fn new(i: I, j: J, i_len: usize, j_len: usize, i_next: bool) -> InterleaveProducer { + InterleaveProducer { + i, + j, + i_len, + j_len, + i_next, + } + } +} + +impl Producer for InterleaveProducer +where + I: Producer, + J: Producer, +{ + type Item = I::Item; + type IntoIter = InterleaveSeq; + + fn into_iter(self) -> Self::IntoIter { + InterleaveSeq { + i: self.i.into_iter().fuse(), + j: self.j.into_iter().fuse(), + i_next: self.i_next, + } + } + + fn min_len(&self) -> usize { + cmp::max(self.i.min_len(), self.j.min_len()) + } + + fn max_len(&self) -> usize { + cmp::min(self.i.max_len(), self.j.max_len()) + } + + /// We know 0 < index <= self.i_len + self.j_len + /// + /// Find a, b satisfying: + /// + /// (1) 0 < a <= self.i_len + /// (2) 0 < b <= self.j_len + /// (3) a + b == index + /// + /// For even splits, set a = b = index/2. + /// For odd splits, set a = (index/2)+1, b = index/2, if `i` + /// should yield the next element, otherwise, if `j` should yield + /// the next element, set a = index/2 and b = (index/2)+1 + fn split_at(self, index: usize) -> (Self, Self) { + #[inline] + fn odd_offset(flag: bool) -> usize { + (!flag) as usize + } + + let even = index % 2 == 0; + let idx = index >> 1; + + // desired split + let (i_idx, j_idx) = ( + idx + odd_offset(even || self.i_next), + idx + odd_offset(even || !self.i_next), + ); + + let (i_split, j_split) = if self.i_len >= i_idx && self.j_len >= j_idx { + (i_idx, j_idx) + } else if self.i_len >= i_idx { + // j too short + (index - self.j_len, self.j_len) + } else { + // i too short + (self.i_len, index - self.i_len) + }; + + let trailing_i_next = even == self.i_next; + let (i_left, i_right) = self.i.split_at(i_split); + let (j_left, j_right) = self.j.split_at(j_split); + + ( + InterleaveProducer::new(i_left, j_left, i_split, j_split, self.i_next), + InterleaveProducer::new( + i_right, + j_right, + self.i_len - i_split, + self.j_len - j_split, + trailing_i_next, + ), + ) + } +} + +/// Wrapper for Interleave to implement DoubleEndedIterator and +/// ExactSizeIterator. +/// +/// This iterator is fused. +struct InterleaveSeq { + i: Fuse, + j: Fuse, + + /// Flag to control which iterator should provide the next element. When + /// `false` then `i` produces the next element, otherwise `j` produces the + /// next element. + i_next: bool, +} + +/// Iterator implementation for InterleaveSeq. This implementation is +/// taken more or less verbatim from itertools. It is replicated here +/// (instead of calling itertools directly), because we also need to +/// implement `DoubledEndedIterator` and `ExactSizeIterator`. +impl Iterator for InterleaveSeq +where + I: Iterator, + J: Iterator, +{ + type Item = I::Item; + + #[inline] + fn next(&mut self) -> Option { + self.i_next = !self.i_next; + if self.i_next { + match self.i.next() { + None => self.j.next(), + r => r, + } + } else { + match self.j.next() { + None => self.i.next(), + r => r, + } + } + } + + fn size_hint(&self) -> (usize, Option) { + let (ih, jh) = (self.i.size_hint(), self.j.size_hint()); + let min = ih.0.saturating_add(jh.0); + let max = match (ih.1, jh.1) { + (Some(x), Some(y)) => x.checked_add(y), + _ => None, + }; + (min, max) + } +} + +// The implementation for DoubleEndedIterator requires +// ExactSizeIterator to provide `next_back()`. The last element will +// come from the iterator that runs out last (ie has the most elements +// in it). If the iterators have the same number of elements, then the +// last iterator will provide the last element. +impl DoubleEndedIterator for InterleaveSeq +where + I: DoubleEndedIterator + ExactSizeIterator, + J: DoubleEndedIterator + ExactSizeIterator, +{ + #[inline] + fn next_back(&mut self) -> Option { + if self.i.len() == self.j.len() { + if self.i_next { + self.i.next_back() + } else { + self.j.next_back() + } + } else if self.i.len() < self.j.len() { + self.j.next_back() + } else { + self.i.next_back() + } + } +} + +impl ExactSizeIterator for InterleaveSeq +where + I: ExactSizeIterator, + J: ExactSizeIterator, +{ + #[inline] + fn len(&self) -> usize { + self.i.len() + self.j.len() + } +} diff --git a/src/iter/interleave_shortest.rs b/src/iter/interleave_shortest.rs new file mode 100644 index 0000000..7d81369 --- /dev/null +++ b/src/iter/interleave_shortest.rs @@ -0,0 +1,85 @@ +use super::plumbing::*; +use super::*; + +/// `InterleaveShortest` is an iterator that works similarly to +/// `Interleave`, but this version stops returning elements once one +/// of the iterators run out. +/// +/// This struct is created by the [`interleave_shortest()`] method on +/// [`IndexedParallelIterator`]. +/// +/// [`interleave_shortest()`]: trait.IndexedParallelIterator.html#method.interleave_shortest +/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html +#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] +#[derive(Debug, Clone)] +pub struct InterleaveShortest +where + I: IndexedParallelIterator, + J: IndexedParallelIterator, +{ + interleave: Interleave, Take>, +} + +impl InterleaveShortest +where + I: IndexedParallelIterator, + J: IndexedParallelIterator, +{ + /// Creates a new `InterleaveShortest` iterator + pub(super) fn new(i: I, j: J) -> Self { + InterleaveShortest { + interleave: if i.len() <= j.len() { + // take equal lengths from both iterators + let n = i.len(); + i.take(n).interleave(j.take(n)) + } else { + // take one extra item from the first iterator + let n = j.len(); + i.take(n + 1).interleave(j.take(n)) + }, + } + } +} + +impl ParallelIterator for InterleaveShortest +where + I: IndexedParallelIterator, + J: IndexedParallelIterator, +{ + type Item = I::Item; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: Consumer, + { + bridge(self, consumer) + } + + fn opt_len(&self) -> Option { + Some(self.len()) + } +} + +impl IndexedParallelIterator for InterleaveShortest +where + I: IndexedParallelIterator, + J: IndexedParallelIterator, +{ + fn drive(self, consumer: C) -> C::Result + where + C: Consumer, + { + bridge(self, consumer) + } + + fn len(&self) -> usize { + self.interleave.len() + } + + fn with_producer(self, callback: CB) -> CB::Output + where + CB: ProducerCallback, + { + self.interleave.with_producer(callback) + } +} diff --git a/src/iter/intersperse.rs b/src/iter/intersperse.rs new file mode 100644 index 0000000..798bdc1 --- /dev/null +++ b/src/iter/intersperse.rs @@ -0,0 +1,410 @@ +use super::plumbing::*; +use super::*; +use std::cell::Cell; +use std::iter::{self, Fuse}; + +/// `Intersperse` is an iterator that inserts a particular item between each +/// item of the adapted iterator. This struct is created by the +/// [`intersperse()`] method on [`ParallelIterator`] +/// +/// [`intersperse()`]: trait.ParallelIterator.html#method.intersperse +/// [`ParallelIterator`]: trait.ParallelIterator.html +#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] +#[derive(Clone, Debug)] +pub struct Intersperse +where + I: ParallelIterator, + I::Item: Clone, +{ + base: I, + item: I::Item, +} + +impl Intersperse +where + I: ParallelIterator, + I::Item: Clone, +{ + /// Creates a new `Intersperse` iterator + pub(super) fn new(base: I, item: I::Item) -> Self { + Intersperse { base, item } + } +} + +impl ParallelIterator for Intersperse +where + I: ParallelIterator, + I::Item: Clone + Send, +{ + type Item = I::Item; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + let consumer1 = IntersperseConsumer::new(consumer, self.item); + self.base.drive_unindexed(consumer1) + } + + fn opt_len(&self) -> Option { + match self.base.opt_len()? { + 0 => Some(0), + len => len.checked_add(len - 1), + } + } +} + +impl IndexedParallelIterator for Intersperse +where + I: IndexedParallelIterator, + I::Item: Clone + Send, +{ + fn drive(self, consumer: C) -> C::Result + where + C: Consumer, + { + let consumer1 = IntersperseConsumer::new(consumer, self.item); + self.base.drive(consumer1) + } + + fn len(&self) -> usize { + let len = self.base.len(); + if len > 0 { + len.checked_add(len - 1).expect("overflow") + } else { + 0 + } + } + + fn with_producer(self, callback: CB) -> CB::Output + where + CB: ProducerCallback, + { + let len = self.len(); + return self.base.with_producer(Callback { + callback, + item: self.item, + len, + }); + + struct Callback { + callback: CB, + item: T, + len: usize, + } + + impl ProducerCallback for Callback + where + CB: ProducerCallback, + T: Clone + Send, + { + type Output = CB::Output; + + fn callback

(self, base: P) -> CB::Output + where + P: Producer, + { + let producer = IntersperseProducer::new(base, self.item, self.len); + self.callback.callback(producer) + } + } + } +} + +struct IntersperseProducer

+where + P: Producer, +{ + base: P, + item: P::Item, + len: usize, + clone_first: bool, +} + +impl

IntersperseProducer

+where + P: Producer, +{ + fn new(base: P, item: P::Item, len: usize) -> Self { + IntersperseProducer { + base, + item, + len, + clone_first: false, + } + } +} + +impl

Producer for IntersperseProducer

+where + P: Producer, + P::Item: Clone + Send, +{ + type Item = P::Item; + type IntoIter = IntersperseIter; + + fn into_iter(self) -> Self::IntoIter { + IntersperseIter { + base: self.base.into_iter().fuse(), + item: self.item, + clone_first: self.len > 0 && self.clone_first, + + // If there's more than one item, then even lengths end the opposite + // of how they started with respect to interspersed clones. + clone_last: self.len > 1 && ((self.len & 1 == 0) ^ self.clone_first), + } + } + + fn min_len(&self) -> usize { + self.base.min_len() + } + fn max_len(&self) -> usize { + self.base.max_len() + } + + fn split_at(self, index: usize) -> (Self, Self) { + debug_assert!(index <= self.len); + + // The left needs half of the items from the base producer, and the + // other half will be our interspersed item. If we're not leading with + // a cloned item, then we need to round up the base number of items, + // otherwise round down. + let base_index = (index + !self.clone_first as usize) / 2; + let (left_base, right_base) = self.base.split_at(base_index); + + let left = IntersperseProducer { + base: left_base, + item: self.item.clone(), + len: index, + clone_first: self.clone_first, + }; + + let right = IntersperseProducer { + base: right_base, + item: self.item, + len: self.len - index, + + // If the index is odd, the right side toggles `clone_first`. + clone_first: (index & 1 == 1) ^ self.clone_first, + }; + + (left, right) + } + + fn fold_with(self, folder: F) -> F + where + F: Folder, + { + let folder1 = IntersperseFolder { + base: folder, + item: self.item, + clone_first: self.clone_first, + }; + self.base.fold_with(folder1).base + } +} + +struct IntersperseIter +where + I: Iterator, +{ + base: Fuse, + item: I::Item, + clone_first: bool, + clone_last: bool, +} + +impl Iterator for IntersperseIter +where + I: DoubleEndedIterator + ExactSizeIterator, + I::Item: Clone, +{ + type Item = I::Item; + + fn next(&mut self) -> Option { + if self.clone_first { + self.clone_first = false; + Some(self.item.clone()) + } else if let next @ Some(_) = self.base.next() { + // If there are any items left, we'll need another clone in front. + self.clone_first = self.base.len() != 0; + next + } else if self.clone_last { + self.clone_last = false; + Some(self.item.clone()) + } else { + None + } + } + + fn size_hint(&self) -> (usize, Option) { + let len = self.len(); + (len, Some(len)) + } +} + +impl DoubleEndedIterator for IntersperseIter +where + I: DoubleEndedIterator + ExactSizeIterator, + I::Item: Clone, +{ + fn next_back(&mut self) -> Option { + if self.clone_last { + self.clone_last = false; + Some(self.item.clone()) + } else if let next_back @ Some(_) = self.base.next_back() { + // If there are any items left, we'll need another clone in back. + self.clone_last = self.base.len() != 0; + next_back + } else if self.clone_first { + self.clone_first = false; + Some(self.item.clone()) + } else { + None + } + } +} + +impl ExactSizeIterator for IntersperseIter +where + I: DoubleEndedIterator + ExactSizeIterator, + I::Item: Clone, +{ + fn len(&self) -> usize { + let len = self.base.len(); + len + len.saturating_sub(1) + self.clone_first as usize + self.clone_last as usize + } +} + +struct IntersperseConsumer { + base: C, + item: T, + clone_first: Cell, +} + +impl IntersperseConsumer +where + C: Consumer, +{ + fn new(base: C, item: T) -> Self { + IntersperseConsumer { + base, + item, + clone_first: false.into(), + } + } +} + +impl Consumer for IntersperseConsumer +where + C: Consumer, + T: Clone + Send, +{ + type Folder = IntersperseFolder; + type Reducer = C::Reducer; + type Result = C::Result; + + fn split_at(mut self, index: usize) -> (Self, Self, Self::Reducer) { + // We'll feed twice as many items to the base consumer, except if we're + // not currently leading with a cloned item, then it's one less. + let base_index = index + index.saturating_sub(!self.clone_first.get() as usize); + let (left, right, reducer) = self.base.split_at(base_index); + + let right = IntersperseConsumer { + base: right, + item: self.item.clone(), + clone_first: true.into(), + }; + self.base = left; + (self, right, reducer) + } + + fn into_folder(self) -> Self::Folder { + IntersperseFolder { + base: self.base.into_folder(), + item: self.item, + clone_first: self.clone_first.get(), + } + } + + fn full(&self) -> bool { + self.base.full() + } +} + +impl UnindexedConsumer for IntersperseConsumer +where + C: UnindexedConsumer, + T: Clone + Send, +{ + fn split_off_left(&self) -> Self { + let left = IntersperseConsumer { + base: self.base.split_off_left(), + item: self.item.clone(), + clone_first: self.clone_first.clone(), + }; + self.clone_first.set(true); + left + } + + fn to_reducer(&self) -> Self::Reducer { + self.base.to_reducer() + } +} + +struct IntersperseFolder { + base: C, + item: T, + clone_first: bool, +} + +impl Folder for IntersperseFolder +where + C: Folder, + T: Clone, +{ + type Result = C::Result; + + fn consume(mut self, item: T) -> Self { + if self.clone_first { + self.base = self.base.consume(self.item.clone()); + if self.base.full() { + return self; + } + } else { + self.clone_first = true; + } + self.base = self.base.consume(item); + self + } + + fn consume_iter(self, iter: I) -> Self + where + I: IntoIterator, + { + let mut clone_first = self.clone_first; + let between_item = self.item; + let base = self.base.consume_iter(iter.into_iter().flat_map(|item| { + let first = if clone_first { + Some(between_item.clone()) + } else { + clone_first = true; + None + }; + first.into_iter().chain(iter::once(item)) + })); + IntersperseFolder { + base, + item: between_item, + clone_first, + } + } + + fn complete(self) -> C::Result { + self.base.complete() + } + + fn full(&self) -> bool { + self.base.full() + } +} diff --git a/src/iter/len.rs b/src/iter/len.rs new file mode 100644 index 0000000..e65b3c0 --- /dev/null +++ b/src/iter/len.rs @@ -0,0 +1,271 @@ +use super::plumbing::*; +use super::*; +use std::cmp; + +/// `MinLen` is an iterator that imposes a minimum length on iterator splits. +/// This struct is created by the [`min_len()`] method on [`IndexedParallelIterator`] +/// +/// [`min_len()`]: trait.IndexedParallelIterator.html#method.min_len +/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html +#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] +#[derive(Debug, Clone)] +pub struct MinLen { + base: I, + min: usize, +} + +impl MinLen +where + I: IndexedParallelIterator, +{ + /// Creates a new `MinLen` iterator. + pub(super) fn new(base: I, min: usize) -> Self { + MinLen { base, min } + } +} + +impl ParallelIterator for MinLen +where + I: IndexedParallelIterator, +{ + type Item = I::Item; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + bridge(self, consumer) + } + + fn opt_len(&self) -> Option { + Some(self.len()) + } +} + +impl IndexedParallelIterator for MinLen +where + I: IndexedParallelIterator, +{ + fn drive>(self, consumer: C) -> C::Result { + bridge(self, consumer) + } + + fn len(&self) -> usize { + self.base.len() + } + + fn with_producer(self, callback: CB) -> CB::Output + where + CB: ProducerCallback, + { + return self.base.with_producer(Callback { + callback, + min: self.min, + }); + + struct Callback { + callback: CB, + min: usize, + } + + impl ProducerCallback for Callback + where + CB: ProducerCallback, + { + type Output = CB::Output; + fn callback

(self, base: P) -> CB::Output + where + P: Producer, + { + let producer = MinLenProducer { + base, + min: self.min, + }; + self.callback.callback(producer) + } + } + } +} + +/// //////////////////////////////////////////////////////////////////////// +/// `MinLenProducer` implementation + +struct MinLenProducer

{ + base: P, + min: usize, +} + +impl

Producer for MinLenProducer

+where + P: Producer, +{ + type Item = P::Item; + type IntoIter = P::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.base.into_iter() + } + + fn min_len(&self) -> usize { + cmp::max(self.min, self.base.min_len()) + } + + fn max_len(&self) -> usize { + self.base.max_len() + } + + fn split_at(self, index: usize) -> (Self, Self) { + let (left, right) = self.base.split_at(index); + ( + MinLenProducer { + base: left, + min: self.min, + }, + MinLenProducer { + base: right, + min: self.min, + }, + ) + } + + fn fold_with(self, folder: F) -> F + where + F: Folder, + { + self.base.fold_with(folder) + } +} + +/// `MaxLen` is an iterator that imposes a maximum length on iterator splits. +/// This struct is created by the [`max_len()`] method on [`IndexedParallelIterator`] +/// +/// [`max_len()`]: trait.IndexedParallelIterator.html#method.max_len +/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html +#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] +#[derive(Debug, Clone)] +pub struct MaxLen { + base: I, + max: usize, +} + +impl MaxLen +where + I: IndexedParallelIterator, +{ + /// Creates a new `MaxLen` iterator. + pub(super) fn new(base: I, max: usize) -> Self { + MaxLen { base, max } + } +} + +impl ParallelIterator for MaxLen +where + I: IndexedParallelIterator, +{ + type Item = I::Item; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + bridge(self, consumer) + } + + fn opt_len(&self) -> Option { + Some(self.len()) + } +} + +impl IndexedParallelIterator for MaxLen +where + I: IndexedParallelIterator, +{ + fn drive>(self, consumer: C) -> C::Result { + bridge(self, consumer) + } + + fn len(&self) -> usize { + self.base.len() + } + + fn with_producer(self, callback: CB) -> CB::Output + where + CB: ProducerCallback, + { + return self.base.with_producer(Callback { + callback, + max: self.max, + }); + + struct Callback { + callback: CB, + max: usize, + } + + impl ProducerCallback for Callback + where + CB: ProducerCallback, + { + type Output = CB::Output; + fn callback

(self, base: P) -> CB::Output + where + P: Producer, + { + let producer = MaxLenProducer { + base, + max: self.max, + }; + self.callback.callback(producer) + } + } + } +} + +/// //////////////////////////////////////////////////////////////////////// +/// `MaxLenProducer` implementation + +struct MaxLenProducer

{ + base: P, + max: usize, +} + +impl

Producer for MaxLenProducer

+where + P: Producer, +{ + type Item = P::Item; + type IntoIter = P::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.base.into_iter() + } + + fn min_len(&self) -> usize { + self.base.min_len() + } + + fn max_len(&self) -> usize { + cmp::min(self.max, self.base.max_len()) + } + + fn split_at(self, index: usize) -> (Self, Self) { + let (left, right) = self.base.split_at(index); + ( + MaxLenProducer { + base: left, + max: self.max, + }, + MaxLenProducer { + base: right, + max: self.max, + }, + ) + } + + fn fold_with(self, folder: F) -> F + where + F: Folder, + { + self.base.fold_with(folder) + } +} diff --git a/src/iter/map.rs b/src/iter/map.rs new file mode 100644 index 0000000..f2a35ff --- /dev/null +++ b/src/iter/map.rs @@ -0,0 +1,259 @@ +use super::plumbing::*; +use super::*; + +use std::fmt::{self, Debug}; +use std::iter; + +/// `Map` is an iterator that transforms the elements of an underlying iterator. +/// +/// This struct is created by the [`map()`] method on [`ParallelIterator`] +/// +/// [`map()`]: trait.ParallelIterator.html#method.map +/// [`ParallelIterator`]: trait.ParallelIterator.html +#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] +#[derive(Clone)] +pub struct Map { + base: I, + map_op: F, +} + +impl Debug for Map { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Map").field("base", &self.base).finish() + } +} + +impl Map +where + I: ParallelIterator, +{ + /// Creates a new `Map` iterator. + pub(super) fn new(base: I, map_op: F) -> Self { + Map { base, map_op } + } +} + +impl ParallelIterator for Map +where + I: ParallelIterator, + F: Fn(I::Item) -> R + Sync + Send, + R: Send, +{ + type Item = F::Output; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + let consumer1 = MapConsumer::new(consumer, &self.map_op); + self.base.drive_unindexed(consumer1) + } + + fn opt_len(&self) -> Option { + self.base.opt_len() + } +} + +impl IndexedParallelIterator for Map +where + I: IndexedParallelIterator, + F: Fn(I::Item) -> R + Sync + Send, + R: Send, +{ + fn drive(self, consumer: C) -> C::Result + where + C: Consumer, + { + let consumer1 = MapConsumer::new(consumer, &self.map_op); + self.base.drive(consumer1) + } + + fn len(&self) -> usize { + self.base.len() + } + + fn with_producer(self, callback: CB) -> CB::Output + where + CB: ProducerCallback, + { + return self.base.with_producer(Callback { + callback, + map_op: self.map_op, + }); + + struct Callback { + callback: CB, + map_op: F, + } + + impl ProducerCallback for Callback + where + CB: ProducerCallback, + F: Fn(T) -> R + Sync, + R: Send, + { + type Output = CB::Output; + + fn callback

(self, base: P) -> CB::Output + where + P: Producer, + { + let producer = MapProducer { + base, + map_op: &self.map_op, + }; + self.callback.callback(producer) + } + } + } +} + +/// //////////////////////////////////////////////////////////////////////// + +struct MapProducer<'f, P, F> { + base: P, + map_op: &'f F, +} + +impl<'f, P, F, R> Producer for MapProducer<'f, P, F> +where + P: Producer, + F: Fn(P::Item) -> R + Sync, + R: Send, +{ + type Item = F::Output; + type IntoIter = iter::Map; + + fn into_iter(self) -> Self::IntoIter { + self.base.into_iter().map(self.map_op) + } + + fn min_len(&self) -> usize { + self.base.min_len() + } + fn max_len(&self) -> usize { + self.base.max_len() + } + + fn split_at(self, index: usize) -> (Self, Self) { + let (left, right) = self.base.split_at(index); + ( + MapProducer { + base: left, + map_op: self.map_op, + }, + MapProducer { + base: right, + map_op: self.map_op, + }, + ) + } + + fn fold_with(self, folder: G) -> G + where + G: Folder, + { + let folder1 = MapFolder { + base: folder, + map_op: self.map_op, + }; + self.base.fold_with(folder1).base + } +} + +/// //////////////////////////////////////////////////////////////////////// +/// Consumer implementation + +struct MapConsumer<'f, C, F> { + base: C, + map_op: &'f F, +} + +impl<'f, C, F> MapConsumer<'f, C, F> { + fn new(base: C, map_op: &'f F) -> Self { + MapConsumer { base, map_op } + } +} + +impl<'f, T, R, C, F> Consumer for MapConsumer<'f, C, F> +where + C: Consumer, + F: Fn(T) -> R + Sync, + R: Send, +{ + type Folder = MapFolder<'f, C::Folder, F>; + type Reducer = C::Reducer; + type Result = C::Result; + + fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) { + let (left, right, reducer) = self.base.split_at(index); + ( + MapConsumer::new(left, self.map_op), + MapConsumer::new(right, self.map_op), + reducer, + ) + } + + fn into_folder(self) -> Self::Folder { + MapFolder { + base: self.base.into_folder(), + map_op: self.map_op, + } + } + + fn full(&self) -> bool { + self.base.full() + } +} + +impl<'f, T, R, C, F> UnindexedConsumer for MapConsumer<'f, C, F> +where + C: UnindexedConsumer, + F: Fn(T) -> R + Sync, + R: Send, +{ + fn split_off_left(&self) -> Self { + MapConsumer::new(self.base.split_off_left(), &self.map_op) + } + + fn to_reducer(&self) -> Self::Reducer { + self.base.to_reducer() + } +} + +struct MapFolder<'f, C, F> { + base: C, + map_op: &'f F, +} + +impl<'f, T, R, C, F> Folder for MapFolder<'f, C, F> +where + C: Folder, + F: Fn(T) -> R, +{ + type Result = C::Result; + + fn consume(self, item: T) -> Self { + let mapped_item = (self.map_op)(item); + MapFolder { + base: self.base.consume(mapped_item), + map_op: self.map_op, + } + } + + fn consume_iter(mut self, iter: I) -> Self + where + I: IntoIterator, + { + self.base = self.base.consume_iter(iter.into_iter().map(self.map_op)); + self + } + + fn complete(self) -> C::Result { + self.base.complete() + } + + fn full(&self) -> bool { + self.base.full() + } +} diff --git a/src/iter/map_with.rs b/src/iter/map_with.rs new file mode 100644 index 0000000..10b1b4c --- /dev/null +++ b/src/iter/map_with.rs @@ -0,0 +1,573 @@ +use super::plumbing::*; +use super::*; + +use std::fmt::{self, Debug}; + +/// `MapWith` is an iterator that transforms the elements of an underlying iterator. +/// +/// This struct is created by the [`map_with()`] method on [`ParallelIterator`] +/// +/// [`map_with()`]: trait.ParallelIterator.html#method.map_with +/// [`ParallelIterator`]: trait.ParallelIterator.html +#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] +#[derive(Clone)] +pub struct MapWith { + base: I, + item: T, + map_op: F, +} + +impl Debug for MapWith { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("MapWith") + .field("base", &self.base) + .field("item", &self.item) + .finish() + } +} + +impl MapWith +where + I: ParallelIterator, +{ + /// Creates a new `MapWith` iterator. + pub(super) fn new(base: I, item: T, map_op: F) -> Self { + MapWith { base, item, map_op } + } +} + +impl ParallelIterator for MapWith +where + I: ParallelIterator, + T: Send + Clone, + F: Fn(&mut T, I::Item) -> R + Sync + Send, + R: Send, +{ + type Item = R; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + let consumer1 = MapWithConsumer::new(consumer, self.item, &self.map_op); + self.base.drive_unindexed(consumer1) + } + + fn opt_len(&self) -> Option { + self.base.opt_len() + } +} + +impl IndexedParallelIterator for MapWith +where + I: IndexedParallelIterator, + T: Send + Clone, + F: Fn(&mut T, I::Item) -> R + Sync + Send, + R: Send, +{ + fn drive(self, consumer: C) -> C::Result + where + C: Consumer, + { + let consumer1 = MapWithConsumer::new(consumer, self.item, &self.map_op); + self.base.drive(consumer1) + } + + fn len(&self) -> usize { + self.base.len() + } + + fn with_producer(self, callback: CB) -> CB::Output + where + CB: ProducerCallback, + { + return self.base.with_producer(Callback { + callback, + item: self.item, + map_op: self.map_op, + }); + + struct Callback { + callback: CB, + item: U, + map_op: F, + } + + impl ProducerCallback for Callback + where + CB: ProducerCallback, + U: Send + Clone, + F: Fn(&mut U, T) -> R + Sync, + R: Send, + { + type Output = CB::Output; + + fn callback

(self, base: P) -> CB::Output + where + P: Producer, + { + let producer = MapWithProducer { + base, + item: self.item, + map_op: &self.map_op, + }; + self.callback.callback(producer) + } + } + } +} + +/// //////////////////////////////////////////////////////////////////////// + +struct MapWithProducer<'f, P, U, F> { + base: P, + item: U, + map_op: &'f F, +} + +impl<'f, P, U, F, R> Producer for MapWithProducer<'f, P, U, F> +where + P: Producer, + U: Send + Clone, + F: Fn(&mut U, P::Item) -> R + Sync, + R: Send, +{ + type Item = R; + type IntoIter = MapWithIter<'f, P::IntoIter, U, F>; + + fn into_iter(self) -> Self::IntoIter { + MapWithIter { + base: self.base.into_iter(), + item: self.item, + map_op: self.map_op, + } + } + + fn min_len(&self) -> usize { + self.base.min_len() + } + fn max_len(&self) -> usize { + self.base.max_len() + } + + fn split_at(self, index: usize) -> (Self, Self) { + let (left, right) = self.base.split_at(index); + ( + MapWithProducer { + base: left, + item: self.item.clone(), + map_op: self.map_op, + }, + MapWithProducer { + base: right, + item: self.item, + map_op: self.map_op, + }, + ) + } + + fn fold_with(self, folder: G) -> G + where + G: Folder, + { + let folder1 = MapWithFolder { + base: folder, + item: self.item, + map_op: self.map_op, + }; + self.base.fold_with(folder1).base + } +} + +struct MapWithIter<'f, I, U, F> { + base: I, + item: U, + map_op: &'f F, +} + +impl<'f, I, U, F, R> Iterator for MapWithIter<'f, I, U, F> +where + I: Iterator, + F: Fn(&mut U, I::Item) -> R + Sync, + R: Send, +{ + type Item = R; + + fn next(&mut self) -> Option { + let item = self.base.next()?; + Some((self.map_op)(&mut self.item, item)) + } + + fn size_hint(&self) -> (usize, Option) { + self.base.size_hint() + } +} + +impl<'f, I, U, F, R> DoubleEndedIterator for MapWithIter<'f, I, U, F> +where + I: DoubleEndedIterator, + F: Fn(&mut U, I::Item) -> R + Sync, + R: Send, +{ + fn next_back(&mut self) -> Option { + let item = self.base.next_back()?; + Some((self.map_op)(&mut self.item, item)) + } +} + +impl<'f, I, U, F, R> ExactSizeIterator for MapWithIter<'f, I, U, F> +where + I: ExactSizeIterator, + F: Fn(&mut U, I::Item) -> R + Sync, + R: Send, +{ +} + +/// //////////////////////////////////////////////////////////////////////// +/// Consumer implementation + +struct MapWithConsumer<'f, C, U, F> { + base: C, + item: U, + map_op: &'f F, +} + +impl<'f, C, U, F> MapWithConsumer<'f, C, U, F> { + fn new(base: C, item: U, map_op: &'f F) -> Self { + MapWithConsumer { base, item, map_op } + } +} + +impl<'f, T, U, R, C, F> Consumer for MapWithConsumer<'f, C, U, F> +where + C: Consumer, + U: Send + Clone, + F: Fn(&mut U, T) -> R + Sync, + R: Send, +{ + type Folder = MapWithFolder<'f, C::Folder, U, F>; + type Reducer = C::Reducer; + type Result = C::Result; + + fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) { + let (left, right, reducer) = self.base.split_at(index); + ( + MapWithConsumer::new(left, self.item.clone(), self.map_op), + MapWithConsumer::new(right, self.item, self.map_op), + reducer, + ) + } + + fn into_folder(self) -> Self::Folder { + MapWithFolder { + base: self.base.into_folder(), + item: self.item, + map_op: self.map_op, + } + } + + fn full(&self) -> bool { + self.base.full() + } +} + +impl<'f, T, U, R, C, F> UnindexedConsumer for MapWithConsumer<'f, C, U, F> +where + C: UnindexedConsumer, + U: Send + Clone, + F: Fn(&mut U, T) -> R + Sync, + R: Send, +{ + fn split_off_left(&self) -> Self { + MapWithConsumer::new(self.base.split_off_left(), self.item.clone(), self.map_op) + } + + fn to_reducer(&self) -> Self::Reducer { + self.base.to_reducer() + } +} + +struct MapWithFolder<'f, C, U, F> { + base: C, + item: U, + map_op: &'f F, +} + +impl<'f, T, U, R, C, F> Folder for MapWithFolder<'f, C, U, F> +where + C: Folder, + F: Fn(&mut U, T) -> R, +{ + type Result = C::Result; + + fn consume(mut self, item: T) -> Self { + let mapped_item = (self.map_op)(&mut self.item, item); + self.base = self.base.consume(mapped_item); + self + } + + fn consume_iter(mut self, iter: I) -> Self + where + I: IntoIterator, + { + fn with<'f, T, U, R>( + item: &'f mut U, + map_op: impl Fn(&mut U, T) -> R + 'f, + ) -> impl FnMut(T) -> R + 'f { + move |x| map_op(item, x) + } + + { + let mapped_iter = iter.into_iter().map(with(&mut self.item, self.map_op)); + self.base = self.base.consume_iter(mapped_iter); + } + self + } + + fn complete(self) -> C::Result { + self.base.complete() + } + + fn full(&self) -> bool { + self.base.full() + } +} + +// ------------------------------------------------------------------------------------------------ + +/// `MapInit` is an iterator that transforms the elements of an underlying iterator. +/// +/// This struct is created by the [`map_init()`] method on [`ParallelIterator`] +/// +/// [`map_init()`]: trait.ParallelIterator.html#method.map_init +/// [`ParallelIterator`]: trait.ParallelIterator.html +#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] +#[derive(Clone)] +pub struct MapInit { + base: I, + init: INIT, + map_op: F, +} + +impl Debug for MapInit { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("MapInit").field("base", &self.base).finish() + } +} + +impl MapInit +where + I: ParallelIterator, +{ + /// Creates a new `MapInit` iterator. + pub(super) fn new(base: I, init: INIT, map_op: F) -> Self { + MapInit { base, init, map_op } + } +} + +impl ParallelIterator for MapInit +where + I: ParallelIterator, + INIT: Fn() -> T + Sync + Send, + F: Fn(&mut T, I::Item) -> R + Sync + Send, + R: Send, +{ + type Item = R; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + let consumer1 = MapInitConsumer::new(consumer, &self.init, &self.map_op); + self.base.drive_unindexed(consumer1) + } + + fn opt_len(&self) -> Option { + self.base.opt_len() + } +} + +impl IndexedParallelIterator for MapInit +where + I: IndexedParallelIterator, + INIT: Fn() -> T + Sync + Send, + F: Fn(&mut T, I::Item) -> R + Sync + Send, + R: Send, +{ + fn drive(self, consumer: C) -> C::Result + where + C: Consumer, + { + let consumer1 = MapInitConsumer::new(consumer, &self.init, &self.map_op); + self.base.drive(consumer1) + } + + fn len(&self) -> usize { + self.base.len() + } + + fn with_producer(self, callback: CB) -> CB::Output + where + CB: ProducerCallback, + { + return self.base.with_producer(Callback { + callback, + init: self.init, + map_op: self.map_op, + }); + + struct Callback { + callback: CB, + init: INIT, + map_op: F, + } + + impl ProducerCallback for Callback + where + CB: ProducerCallback, + INIT: Fn() -> U + Sync, + F: Fn(&mut U, T) -> R + Sync, + R: Send, + { + type Output = CB::Output; + + fn callback

(self, base: P) -> CB::Output + where + P: Producer, + { + let producer = MapInitProducer { + base, + init: &self.init, + map_op: &self.map_op, + }; + self.callback.callback(producer) + } + } + } +} + +/// //////////////////////////////////////////////////////////////////////// + +struct MapInitProducer<'f, P, INIT, F> { + base: P, + init: &'f INIT, + map_op: &'f F, +} + +impl<'f, P, INIT, U, F, R> Producer for MapInitProducer<'f, P, INIT, F> +where + P: Producer, + INIT: Fn() -> U + Sync, + F: Fn(&mut U, P::Item) -> R + Sync, + R: Send, +{ + type Item = R; + type IntoIter = MapWithIter<'f, P::IntoIter, U, F>; + + fn into_iter(self) -> Self::IntoIter { + MapWithIter { + base: self.base.into_iter(), + item: (self.init)(), + map_op: self.map_op, + } + } + + fn min_len(&self) -> usize { + self.base.min_len() + } + fn max_len(&self) -> usize { + self.base.max_len() + } + + fn split_at(self, index: usize) -> (Self, Self) { + let (left, right) = self.base.split_at(index); + ( + MapInitProducer { + base: left, + init: self.init, + map_op: self.map_op, + }, + MapInitProducer { + base: right, + init: self.init, + map_op: self.map_op, + }, + ) + } + + fn fold_with(self, folder: G) -> G + where + G: Folder, + { + let folder1 = MapWithFolder { + base: folder, + item: (self.init)(), + map_op: self.map_op, + }; + self.base.fold_with(folder1).base + } +} + +/// //////////////////////////////////////////////////////////////////////// +/// Consumer implementation + +struct MapInitConsumer<'f, C, INIT, F> { + base: C, + init: &'f INIT, + map_op: &'f F, +} + +impl<'f, C, INIT, F> MapInitConsumer<'f, C, INIT, F> { + fn new(base: C, init: &'f INIT, map_op: &'f F) -> Self { + MapInitConsumer { base, init, map_op } + } +} + +impl<'f, T, INIT, U, R, C, F> Consumer for MapInitConsumer<'f, C, INIT, F> +where + C: Consumer, + INIT: Fn() -> U + Sync, + F: Fn(&mut U, T) -> R + Sync, + R: Send, +{ + type Folder = MapWithFolder<'f, C::Folder, U, F>; + type Reducer = C::Reducer; + type Result = C::Result; + + fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) { + let (left, right, reducer) = self.base.split_at(index); + ( + MapInitConsumer::new(left, self.init, self.map_op), + MapInitConsumer::new(right, self.init, self.map_op), + reducer, + ) + } + + fn into_folder(self) -> Self::Folder { + MapWithFolder { + base: self.base.into_folder(), + item: (self.init)(), + map_op: self.map_op, + } + } + + fn full(&self) -> bool { + self.base.full() + } +} + +impl<'f, T, INIT, U, R, C, F> UnindexedConsumer for MapInitConsumer<'f, C, INIT, F> +where + C: UnindexedConsumer, + INIT: Fn() -> U + Sync, + F: Fn(&mut U, T) -> R + Sync, + R: Send, +{ + fn split_off_left(&self) -> Self { + MapInitConsumer::new(self.base.split_off_left(), self.init, self.map_op) + } + + fn to_reducer(&self) -> Self::Reducer { + self.base.to_reducer() + } +} diff --git a/src/iter/mod.rs b/src/iter/mod.rs new file mode 100644 index 0000000..0c82933 --- /dev/null +++ b/src/iter/mod.rs @@ -0,0 +1,3121 @@ +//! Traits for writing parallel programs using an iterator-style interface +//! +//! You will rarely need to interact with this module directly unless you have +//! need to name one of the iterator types. +//! +//! Parallel iterators make it easy to write iterator-like chains that +//! execute in parallel: typically all you have to do is convert the +//! first `.iter()` (or `iter_mut()`, `into_iter()`, etc) method into +//! `par_iter()` (or `par_iter_mut()`, `into_par_iter()`, etc). For +//! example, to compute the sum of the squares of a sequence of +//! integers, one might write: +//! +//! ```rust +//! use rayon::prelude::*; +//! fn sum_of_squares(input: &[i32]) -> i32 { +//! input.par_iter() +//! .map(|i| i * i) +//! .sum() +//! } +//! ``` +//! +//! Or, to increment all the integers in a slice, you could write: +//! +//! ```rust +//! use rayon::prelude::*; +//! fn increment_all(input: &mut [i32]) { +//! input.par_iter_mut() +//! .for_each(|p| *p += 1); +//! } +//! ``` +//! +//! To use parallel iterators, first import the traits by adding +//! something like `use rayon::prelude::*` to your module. You can +//! then call `par_iter`, `par_iter_mut`, or `into_par_iter` to get a +//! parallel iterator. Like a [regular iterator][], parallel +//! iterators work by first constructing a computation and then +//! executing it. +//! +//! In addition to `par_iter()` and friends, some types offer other +//! ways to create (or consume) parallel iterators: +//! +//! - Slices (`&[T]`, `&mut [T]`) offer methods like `par_split` and +//! `par_windows`, as well as various parallel sorting +//! operations. See [the `ParallelSlice` trait] for the full list. +//! - Strings (`&str`) offer methods like `par_split` and `par_lines`. +//! See [the `ParallelString` trait] for the full list. +//! - Various collections offer [`par_extend`], which grows a +//! collection given a parallel iterator. (If you don't have a +//! collection to extend, you can use [`collect()`] to create a new +//! one from scratch.) +//! +//! [the `ParallelSlice` trait]: ../slice/trait.ParallelSlice.html +//! [the `ParallelString` trait]: ../str/trait.ParallelString.html +//! [`par_extend`]: trait.ParallelExtend.html +//! [`collect()`]: trait.ParallelIterator.html#method.collect +//! +//! To see the full range of methods available on parallel iterators, +//! check out the [`ParallelIterator`] and [`IndexedParallelIterator`] +//! traits. +//! +//! If you'd like to build a custom parallel iterator, or to write your own +//! combinator, then check out the [split] function and the [plumbing] module. +//! +//! [regular iterator]: http://doc.rust-lang.org/std/iter/trait.Iterator.html +//! [`ParallelIterator`]: trait.ParallelIterator.html +//! [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html +//! [split]: fn.split.html +//! [plumbing]: plumbing/index.html +//! +//! Note: Several of the `ParallelIterator` methods rely on a `Try` trait which +//! has been deliberately obscured from the public API. This trait is intended +//! to mirror the unstable `std::ops::Try` with implementations for `Option` and +//! `Result`, where `Some`/`Ok` values will let those iterators continue, but +//! `None`/`Err` values will exit early. +//! +//! A note about object safety: It is currently _not_ possible to wrap +//! a `ParallelIterator` (or any trait that depends on it) using a +//! `Box` or other kind of dynamic allocation, +//! because `ParallelIterator` is **not object-safe**. +//! (This keeps the implementation simpler and allows extra optimizations.) + +use self::plumbing::*; +use self::private::Try; +pub use either::Either; +use std::cmp::{self, Ordering}; +use std::iter::{Product, Sum}; +use std::ops::{Fn, RangeBounds}; + +pub mod plumbing; + +#[cfg(test)] +mod test; + +// There is a method to the madness here: +// +// - These modules are private but expose certain types to the end-user +// (e.g., `enumerate::Enumerate`) -- specifically, the types that appear in the +// public API surface of the `ParallelIterator` traits. +// - In **this** module, those public types are always used unprefixed, which forces +// us to add a `pub use` and helps identify if we missed anything. +// - In contrast, items that appear **only** in the body of a method, +// e.g. `find::find()`, are always used **prefixed**, so that they +// can be readily distinguished. + +mod chain; +mod chunks; +mod cloned; +mod collect; +mod copied; +mod empty; +mod enumerate; +mod extend; +mod filter; +mod filter_map; +mod find; +mod find_first_last; +mod flat_map; +mod flat_map_iter; +mod flatten; +mod flatten_iter; +mod fold; +mod for_each; +mod from_par_iter; +mod inspect; +mod interleave; +mod interleave_shortest; +mod intersperse; +mod len; +mod map; +mod map_with; +mod multizip; +mod noop; +mod once; +mod panic_fuse; +mod par_bridge; +mod positions; +mod product; +mod reduce; +mod repeat; +mod rev; +mod skip; +mod splitter; +mod sum; +mod take; +mod try_fold; +mod try_reduce; +mod try_reduce_with; +mod unzip; +mod update; +mod while_some; +mod zip; +mod zip_eq; + +pub use self::{ + chain::Chain, + chunks::Chunks, + cloned::Cloned, + copied::Copied, + empty::{empty, Empty}, + enumerate::Enumerate, + filter::Filter, + filter_map::FilterMap, + flat_map::FlatMap, + flat_map_iter::FlatMapIter, + flatten::Flatten, + flatten_iter::FlattenIter, + fold::{Fold, FoldWith}, + inspect::Inspect, + interleave::Interleave, + interleave_shortest::InterleaveShortest, + intersperse::Intersperse, + len::{MaxLen, MinLen}, + map::Map, + map_with::{MapInit, MapWith}, + multizip::MultiZip, + once::{once, Once}, + panic_fuse::PanicFuse, + par_bridge::{IterBridge, ParallelBridge}, + positions::Positions, + repeat::{repeat, repeatn, Repeat, RepeatN}, + rev::Rev, + skip::Skip, + splitter::{split, Split}, + take::Take, + try_fold::{TryFold, TryFoldWith}, + update::Update, + while_some::WhileSome, + zip::Zip, + zip_eq::ZipEq, +}; + +mod step_by; +#[cfg(step_by)] +pub use self::step_by::StepBy; + +/// `IntoParallelIterator` implements the conversion to a [`ParallelIterator`]. +/// +/// By implementing `IntoParallelIterator` for a type, you define how it will +/// transformed into an iterator. This is a parallel version of the standard +/// library's [`std::iter::IntoIterator`] trait. +/// +/// [`ParallelIterator`]: trait.ParallelIterator.html +/// [`std::iter::IntoIterator`]: https://doc.rust-lang.org/std/iter/trait.IntoIterator.html +pub trait IntoParallelIterator { + /// The parallel iterator type that will be created. + type Iter: ParallelIterator; + + /// The type of item that the parallel iterator will produce. + type Item: Send; + + /// Converts `self` into a parallel iterator. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// println!("counting in parallel:"); + /// (0..100).into_par_iter() + /// .for_each(|i| println!("{}", i)); + /// ``` + /// + /// This conversion is often implicit for arguments to methods like [`zip`]. + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let v: Vec<_> = (0..5).into_par_iter().zip(5..10).collect(); + /// assert_eq!(v, [(0, 5), (1, 6), (2, 7), (3, 8), (4, 9)]); + /// ``` + /// + /// [`zip`]: trait.IndexedParallelIterator.html#method.zip + fn into_par_iter(self) -> Self::Iter; +} + +/// `IntoParallelRefIterator` implements the conversion to a +/// [`ParallelIterator`], providing shared references to the data. +/// +/// This is a parallel version of the `iter()` method +/// defined by various collections. +/// +/// This trait is automatically implemented +/// `for I where &I: IntoParallelIterator`. In most cases, users +/// will want to implement [`IntoParallelIterator`] rather than implement +/// this trait directly. +/// +/// [`ParallelIterator`]: trait.ParallelIterator.html +/// [`IntoParallelIterator`]: trait.IntoParallelIterator.html +pub trait IntoParallelRefIterator<'data> { + /// The type of the parallel iterator that will be returned. + type Iter: ParallelIterator; + + /// The type of item that the parallel iterator will produce. + /// This will typically be an `&'data T` reference type. + type Item: Send + 'data; + + /// Converts `self` into a parallel iterator. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let v: Vec<_> = (0..100).collect(); + /// assert_eq!(v.par_iter().sum::(), 100 * 99 / 2); + /// + /// // `v.par_iter()` is shorthand for `(&v).into_par_iter()`, + /// // producing the exact same references. + /// assert!(v.par_iter().zip(&v) + /// .all(|(a, b)| std::ptr::eq(a, b))); + /// ``` + fn par_iter(&'data self) -> Self::Iter; +} + +impl<'data, I: 'data + ?Sized> IntoParallelRefIterator<'data> for I +where + &'data I: IntoParallelIterator, +{ + type Iter = <&'data I as IntoParallelIterator>::Iter; + type Item = <&'data I as IntoParallelIterator>::Item; + + fn par_iter(&'data self) -> Self::Iter { + self.into_par_iter() + } +} + +/// `IntoParallelRefMutIterator` implements the conversion to a +/// [`ParallelIterator`], providing mutable references to the data. +/// +/// This is a parallel version of the `iter_mut()` method +/// defined by various collections. +/// +/// This trait is automatically implemented +/// `for I where &mut I: IntoParallelIterator`. In most cases, users +/// will want to implement [`IntoParallelIterator`] rather than implement +/// this trait directly. +/// +/// [`ParallelIterator`]: trait.ParallelIterator.html +/// [`IntoParallelIterator`]: trait.IntoParallelIterator.html +pub trait IntoParallelRefMutIterator<'data> { + /// The type of iterator that will be created. + type Iter: ParallelIterator; + + /// The type of item that will be produced; this is typically an + /// `&'data mut T` reference. + type Item: Send + 'data; + + /// Creates the parallel iterator from `self`. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let mut v = vec![0usize; 5]; + /// v.par_iter_mut().enumerate().for_each(|(i, x)| *x = i); + /// assert_eq!(v, [0, 1, 2, 3, 4]); + /// ``` + fn par_iter_mut(&'data mut self) -> Self::Iter; +} + +impl<'data, I: 'data + ?Sized> IntoParallelRefMutIterator<'data> for I +where + &'data mut I: IntoParallelIterator, +{ + type Iter = <&'data mut I as IntoParallelIterator>::Iter; + type Item = <&'data mut I as IntoParallelIterator>::Item; + + fn par_iter_mut(&'data mut self) -> Self::Iter { + self.into_par_iter() + } +} + +/// Parallel version of the standard iterator trait. +/// +/// The combinators on this trait are available on **all** parallel +/// iterators. Additional methods can be found on the +/// [`IndexedParallelIterator`] trait: those methods are only +/// available for parallel iterators where the number of items is +/// known in advance (so, e.g., after invoking `filter`, those methods +/// become unavailable). +/// +/// For examples of using parallel iterators, see [the docs on the +/// `iter` module][iter]. +/// +/// [iter]: index.html +/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html +pub trait ParallelIterator: Sized + Send { + /// The type of item that this parallel iterator produces. + /// For example, if you use the [`for_each`] method, this is the type of + /// item that your closure will be invoked with. + /// + /// [`for_each`]: #method.for_each + type Item: Send; + + /// Executes `OP` on each item produced by the iterator, in parallel. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// (0..100).into_par_iter().for_each(|x| println!("{:?}", x)); + /// ``` + fn for_each(self, op: OP) + where + OP: Fn(Self::Item) + Sync + Send, + { + for_each::for_each(self, &op) + } + + /// Executes `OP` on the given `init` value with each item produced by + /// the iterator, in parallel. + /// + /// The `init` value will be cloned only as needed to be paired with + /// the group of items in each rayon job. It does not require the type + /// to be `Sync`. + /// + /// # Examples + /// + /// ``` + /// use std::sync::mpsc::channel; + /// use rayon::prelude::*; + /// + /// let (sender, receiver) = channel(); + /// + /// (0..5).into_par_iter().for_each_with(sender, |s, x| s.send(x).unwrap()); + /// + /// let mut res: Vec<_> = receiver.iter().collect(); + /// + /// res.sort(); + /// + /// assert_eq!(&res[..], &[0, 1, 2, 3, 4]) + /// ``` + fn for_each_with(self, init: T, op: OP) + where + OP: Fn(&mut T, Self::Item) + Sync + Send, + T: Send + Clone, + { + self.map_with(init, op).collect() + } + + /// Executes `OP` on a value returned by `init` with each item produced by + /// the iterator, in parallel. + /// + /// The `init` function will be called only as needed for a value to be + /// paired with the group of items in each rayon job. There is no + /// constraint on that returned type at all! + /// + /// # Examples + /// + /// ``` + /// use rand::Rng; + /// use rayon::prelude::*; + /// + /// let mut v = vec![0u8; 1_000_000]; + /// + /// v.par_chunks_mut(1000) + /// .for_each_init( + /// || rand::thread_rng(), + /// |rng, chunk| rng.fill(chunk), + /// ); + /// + /// // There's a remote chance that this will fail... + /// for i in 0u8..=255 { + /// assert!(v.contains(&i)); + /// } + /// ``` + fn for_each_init(self, init: INIT, op: OP) + where + OP: Fn(&mut T, Self::Item) + Sync + Send, + INIT: Fn() -> T + Sync + Send, + { + self.map_init(init, op).collect() + } + + /// Executes a fallible `OP` on each item produced by the iterator, in parallel. + /// + /// If the `OP` returns `Result::Err` or `Option::None`, we will attempt to + /// stop processing the rest of the items in the iterator as soon as + /// possible, and we will return that terminating value. Otherwise, we will + /// return an empty `Result::Ok(())` or `Option::Some(())`. If there are + /// multiple errors in parallel, it is not specified which will be returned. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// use std::io::{self, Write}; + /// + /// // This will stop iteration early if there's any write error, like + /// // having piped output get closed on the other end. + /// (0..100).into_par_iter() + /// .try_for_each(|x| writeln!(io::stdout(), "{:?}", x)) + /// .expect("expected no write errors"); + /// ``` + fn try_for_each(self, op: OP) -> R + where + OP: Fn(Self::Item) -> R + Sync + Send, + R: Try + Send, + { + fn ok>(_: (), _: ()) -> R { + R::from_ok(()) + } + + self.map(op).try_reduce(<()>::default, ok) + } + + /// Executes a fallible `OP` on the given `init` value with each item + /// produced by the iterator, in parallel. + /// + /// This combines the `init` semantics of [`for_each_with()`] and the + /// failure semantics of [`try_for_each()`]. + /// + /// [`for_each_with()`]: #method.for_each_with + /// [`try_for_each()`]: #method.try_for_each + /// + /// # Examples + /// + /// ``` + /// use std::sync::mpsc::channel; + /// use rayon::prelude::*; + /// + /// let (sender, receiver) = channel(); + /// + /// (0..5).into_par_iter() + /// .try_for_each_with(sender, |s, x| s.send(x)) + /// .expect("expected no send errors"); + /// + /// let mut res: Vec<_> = receiver.iter().collect(); + /// + /// res.sort(); + /// + /// assert_eq!(&res[..], &[0, 1, 2, 3, 4]) + /// ``` + fn try_for_each_with(self, init: T, op: OP) -> R + where + OP: Fn(&mut T, Self::Item) -> R + Sync + Send, + T: Send + Clone, + R: Try + Send, + { + fn ok>(_: (), _: ()) -> R { + R::from_ok(()) + } + + self.map_with(init, op).try_reduce(<()>::default, ok) + } + + /// Executes a fallible `OP` on a value returned by `init` with each item + /// produced by the iterator, in parallel. + /// + /// This combines the `init` semantics of [`for_each_init()`] and the + /// failure semantics of [`try_for_each()`]. + /// + /// [`for_each_init()`]: #method.for_each_init + /// [`try_for_each()`]: #method.try_for_each + /// + /// # Examples + /// + /// ``` + /// use rand::Rng; + /// use rayon::prelude::*; + /// + /// let mut v = vec![0u8; 1_000_000]; + /// + /// v.par_chunks_mut(1000) + /// .try_for_each_init( + /// || rand::thread_rng(), + /// |rng, chunk| rng.try_fill(chunk), + /// ) + /// .expect("expected no rand errors"); + /// + /// // There's a remote chance that this will fail... + /// for i in 0u8..=255 { + /// assert!(v.contains(&i)); + /// } + /// ``` + fn try_for_each_init(self, init: INIT, op: OP) -> R + where + OP: Fn(&mut T, Self::Item) -> R + Sync + Send, + INIT: Fn() -> T + Sync + Send, + R: Try + Send, + { + fn ok>(_: (), _: ()) -> R { + R::from_ok(()) + } + + self.map_init(init, op).try_reduce(<()>::default, ok) + } + + /// Counts the number of items in this parallel iterator. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let count = (0..100).into_par_iter().count(); + /// + /// assert_eq!(count, 100); + /// ``` + fn count(self) -> usize { + fn one(_: T) -> usize { + 1 + } + + self.map(one).sum() + } + + /// Applies `map_op` to each item of this iterator, producing a new + /// iterator with the results. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let mut par_iter = (0..5).into_par_iter().map(|x| x * 2); + /// + /// let doubles: Vec<_> = par_iter.collect(); + /// + /// assert_eq!(&doubles[..], &[0, 2, 4, 6, 8]); + /// ``` + fn map(self, map_op: F) -> Map + where + F: Fn(Self::Item) -> R + Sync + Send, + R: Send, + { + Map::new(self, map_op) + } + + /// Applies `map_op` to the given `init` value with each item of this + /// iterator, producing a new iterator with the results. + /// + /// The `init` value will be cloned only as needed to be paired with + /// the group of items in each rayon job. It does not require the type + /// to be `Sync`. + /// + /// # Examples + /// + /// ``` + /// use std::sync::mpsc::channel; + /// use rayon::prelude::*; + /// + /// let (sender, receiver) = channel(); + /// + /// let a: Vec<_> = (0..5) + /// .into_par_iter() // iterating over i32 + /// .map_with(sender, |s, x| { + /// s.send(x).unwrap(); // sending i32 values through the channel + /// x // returning i32 + /// }) + /// .collect(); // collecting the returned values into a vector + /// + /// let mut b: Vec<_> = receiver.iter() // iterating over the values in the channel + /// .collect(); // and collecting them + /// b.sort(); + /// + /// assert_eq!(a, b); + /// ``` + fn map_with(self, init: T, map_op: F) -> MapWith + where + F: Fn(&mut T, Self::Item) -> R + Sync + Send, + T: Send + Clone, + R: Send, + { + MapWith::new(self, init, map_op) + } + + /// Applies `map_op` to a value returned by `init` with each item of this + /// iterator, producing a new iterator with the results. + /// + /// The `init` function will be called only as needed for a value to be + /// paired with the group of items in each rayon job. There is no + /// constraint on that returned type at all! + /// + /// # Examples + /// + /// ``` + /// use rand::Rng; + /// use rayon::prelude::*; + /// + /// let a: Vec<_> = (1i32..1_000_000) + /// .into_par_iter() + /// .map_init( + /// || rand::thread_rng(), // get the thread-local RNG + /// |rng, x| if rng.gen() { // randomly negate items + /// -x + /// } else { + /// x + /// }, + /// ).collect(); + /// + /// // There's a remote chance that this will fail... + /// assert!(a.iter().any(|&x| x < 0)); + /// assert!(a.iter().any(|&x| x > 0)); + /// ``` + fn map_init(self, init: INIT, map_op: F) -> MapInit + where + F: Fn(&mut T, Self::Item) -> R + Sync + Send, + INIT: Fn() -> T + Sync + Send, + R: Send, + { + MapInit::new(self, init, map_op) + } + + /// Creates an iterator which clones all of its elements. This may be + /// useful when you have an iterator over `&T`, but you need `T`, and + /// that type implements `Clone`. See also [`copied()`]. + /// + /// [`copied()`]: #method.copied + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let a = [1, 2, 3]; + /// + /// let v_cloned: Vec<_> = a.par_iter().cloned().collect(); + /// + /// // cloned is the same as .map(|&x| x), for integers + /// let v_map: Vec<_> = a.par_iter().map(|&x| x).collect(); + /// + /// assert_eq!(v_cloned, vec![1, 2, 3]); + /// assert_eq!(v_map, vec![1, 2, 3]); + /// ``` + fn cloned<'a, T>(self) -> Cloned + where + T: 'a + Clone + Send, + Self: ParallelIterator, + { + Cloned::new(self) + } + + /// Creates an iterator which copies all of its elements. This may be + /// useful when you have an iterator over `&T`, but you need `T`, and + /// that type implements `Copy`. See also [`cloned()`]. + /// + /// [`cloned()`]: #method.cloned + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let a = [1, 2, 3]; + /// + /// let v_copied: Vec<_> = a.par_iter().copied().collect(); + /// + /// // copied is the same as .map(|&x| x), for integers + /// let v_map: Vec<_> = a.par_iter().map(|&x| x).collect(); + /// + /// assert_eq!(v_copied, vec![1, 2, 3]); + /// assert_eq!(v_map, vec![1, 2, 3]); + /// ``` + fn copied<'a, T>(self) -> Copied + where + T: 'a + Copy + Send, + Self: ParallelIterator, + { + Copied::new(self) + } + + /// Applies `inspect_op` to a reference to each item of this iterator, + /// producing a new iterator passing through the original items. This is + /// often useful for debugging to see what's happening in iterator stages. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let a = [1, 4, 2, 3]; + /// + /// // this iterator sequence is complex. + /// let sum = a.par_iter() + /// .cloned() + /// .filter(|&x| x % 2 == 0) + /// .reduce(|| 0, |sum, i| sum + i); + /// + /// println!("{}", sum); + /// + /// // let's add some inspect() calls to investigate what's happening + /// let sum = a.par_iter() + /// .cloned() + /// .inspect(|x| println!("about to filter: {}", x)) + /// .filter(|&x| x % 2 == 0) + /// .inspect(|x| println!("made it through filter: {}", x)) + /// .reduce(|| 0, |sum, i| sum + i); + /// + /// println!("{}", sum); + /// ``` + fn inspect(self, inspect_op: OP) -> Inspect + where + OP: Fn(&Self::Item) + Sync + Send, + { + Inspect::new(self, inspect_op) + } + + /// Mutates each item of this iterator before yielding it. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let par_iter = (0..5).into_par_iter().update(|x| {*x *= 2;}); + /// + /// let doubles: Vec<_> = par_iter.collect(); + /// + /// assert_eq!(&doubles[..], &[0, 2, 4, 6, 8]); + /// ``` + fn update(self, update_op: F) -> Update + where + F: Fn(&mut Self::Item) + Sync + Send, + { + Update::new(self, update_op) + } + + /// Applies `filter_op` to each item of this iterator, producing a new + /// iterator with only the items that gave `true` results. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let mut par_iter = (0..10).into_par_iter().filter(|x| x % 2 == 0); + /// + /// let even_numbers: Vec<_> = par_iter.collect(); + /// + /// assert_eq!(&even_numbers[..], &[0, 2, 4, 6, 8]); + /// ``` + fn filter

(self, filter_op: P) -> Filter + where + P: Fn(&Self::Item) -> bool + Sync + Send, + { + Filter::new(self, filter_op) + } + + /// Applies `filter_op` to each item of this iterator to get an `Option`, + /// producing a new iterator with only the items from `Some` results. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let mut par_iter = (0..10).into_par_iter() + /// .filter_map(|x| { + /// if x % 2 == 0 { Some(x * 3) } + /// else { None } + /// }); + /// + /// let even_numbers: Vec<_> = par_iter.collect(); + /// + /// assert_eq!(&even_numbers[..], &[0, 6, 12, 18, 24]); + /// ``` + fn filter_map(self, filter_op: P) -> FilterMap + where + P: Fn(Self::Item) -> Option + Sync + Send, + R: Send, + { + FilterMap::new(self, filter_op) + } + + /// Applies `map_op` to each item of this iterator to get nested parallel iterators, + /// producing a new parallel iterator that flattens these back into one. + /// + /// See also [`flat_map_iter`](#method.flat_map_iter). + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let a = [[1, 2], [3, 4], [5, 6], [7, 8]]; + /// + /// let par_iter = a.par_iter().cloned().flat_map(|a| a.to_vec()); + /// + /// let vec: Vec<_> = par_iter.collect(); + /// + /// assert_eq!(&vec[..], &[1, 2, 3, 4, 5, 6, 7, 8]); + /// ``` + fn flat_map(self, map_op: F) -> FlatMap + where + F: Fn(Self::Item) -> PI + Sync + Send, + PI: IntoParallelIterator, + { + FlatMap::new(self, map_op) + } + + /// Applies `map_op` to each item of this iterator to get nested serial iterators, + /// producing a new parallel iterator that flattens these back into one. + /// + /// # `flat_map_iter` versus `flat_map` + /// + /// These two methods are similar but behave slightly differently. With [`flat_map`], + /// each of the nested iterators must be a parallel iterator, and they will be further + /// split up with nested parallelism. With `flat_map_iter`, each nested iterator is a + /// sequential `Iterator`, and we only parallelize _between_ them, while the items + /// produced by each nested iterator are processed sequentially. + /// + /// When choosing between these methods, consider whether nested parallelism suits the + /// potential iterators at hand. If there's little computation involved, or its length + /// is much less than the outer parallel iterator, then it may perform better to avoid + /// the overhead of parallelism, just flattening sequentially with `flat_map_iter`. + /// If there is a lot of computation, potentially outweighing the outer parallel + /// iterator, then the nested parallelism of `flat_map` may be worthwhile. + /// + /// [`flat_map`]: #method.flat_map + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// use std::cell::RefCell; + /// + /// let a = [[1, 2], [3, 4], [5, 6], [7, 8]]; + /// + /// let par_iter = a.par_iter().flat_map_iter(|a| { + /// // The serial iterator doesn't have to be thread-safe, just its items. + /// let cell_iter = RefCell::new(a.iter().cloned()); + /// std::iter::from_fn(move || cell_iter.borrow_mut().next()) + /// }); + /// + /// let vec: Vec<_> = par_iter.collect(); + /// + /// assert_eq!(&vec[..], &[1, 2, 3, 4, 5, 6, 7, 8]); + /// ``` + fn flat_map_iter(self, map_op: F) -> FlatMapIter + where + F: Fn(Self::Item) -> SI + Sync + Send, + SI: IntoIterator, + SI::Item: Send, + { + FlatMapIter::new(self, map_op) + } + + /// An adaptor that flattens parallel-iterable `Item`s into one large iterator. + /// + /// See also [`flatten_iter`](#method.flatten_iter). + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let x: Vec> = vec![vec![1, 2], vec![3, 4]]; + /// let y: Vec<_> = x.into_par_iter().flatten().collect(); + /// + /// assert_eq!(y, vec![1, 2, 3, 4]); + /// ``` + fn flatten(self) -> Flatten + where + Self::Item: IntoParallelIterator, + { + Flatten::new(self) + } + + /// An adaptor that flattens serial-iterable `Item`s into one large iterator. + /// + /// See also [`flatten`](#method.flatten) and the analagous comparison of + /// [`flat_map_iter` versus `flat_map`](#flat_map_iter-versus-flat_map). + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let x: Vec> = vec![vec![1, 2], vec![3, 4]]; + /// let iters: Vec<_> = x.into_iter().map(Vec::into_iter).collect(); + /// let y: Vec<_> = iters.into_par_iter().flatten_iter().collect(); + /// + /// assert_eq!(y, vec![1, 2, 3, 4]); + /// ``` + fn flatten_iter(self) -> FlattenIter + where + Self::Item: IntoIterator, + ::Item: Send, + { + FlattenIter::new(self) + } + + /// Reduces the items in the iterator into one item using `op`. + /// The argument `identity` should be a closure that can produce + /// "identity" value which may be inserted into the sequence as + /// needed to create opportunities for parallel execution. So, for + /// example, if you are doing a summation, then `identity()` ought + /// to produce something that represents the zero for your type + /// (but consider just calling `sum()` in that case). + /// + /// # Examples + /// + /// ``` + /// // Iterate over a sequence of pairs `(x0, y0), ..., (xN, yN)` + /// // and use reduce to compute one pair `(x0 + ... + xN, y0 + ... + yN)` + /// // where the first/second elements are summed separately. + /// use rayon::prelude::*; + /// let sums = [(0, 1), (5, 6), (16, 2), (8, 9)] + /// .par_iter() // iterating over &(i32, i32) + /// .cloned() // iterating over (i32, i32) + /// .reduce(|| (0, 0), // the "identity" is 0 in both columns + /// |a, b| (a.0 + b.0, a.1 + b.1)); + /// assert_eq!(sums, (0 + 5 + 16 + 8, 1 + 6 + 2 + 9)); + /// ``` + /// + /// **Note:** unlike a sequential `fold` operation, the order in + /// which `op` will be applied to reduce the result is not fully + /// specified. So `op` should be [associative] or else the results + /// will be non-deterministic. And of course `identity()` should + /// produce a true identity. + /// + /// [associative]: https://en.wikipedia.org/wiki/Associative_property + fn reduce(self, identity: ID, op: OP) -> Self::Item + where + OP: Fn(Self::Item, Self::Item) -> Self::Item + Sync + Send, + ID: Fn() -> Self::Item + Sync + Send, + { + reduce::reduce(self, identity, op) + } + + /// Reduces the items in the iterator into one item using `op`. + /// If the iterator is empty, `None` is returned; otherwise, + /// `Some` is returned. + /// + /// This version of `reduce` is simple but somewhat less + /// efficient. If possible, it is better to call `reduce()`, which + /// requires an identity element. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// let sums = [(0, 1), (5, 6), (16, 2), (8, 9)] + /// .par_iter() // iterating over &(i32, i32) + /// .cloned() // iterating over (i32, i32) + /// .reduce_with(|a, b| (a.0 + b.0, a.1 + b.1)) + /// .unwrap(); + /// assert_eq!(sums, (0 + 5 + 16 + 8, 1 + 6 + 2 + 9)); + /// ``` + /// + /// **Note:** unlike a sequential `fold` operation, the order in + /// which `op` will be applied to reduce the result is not fully + /// specified. So `op` should be [associative] or else the results + /// will be non-deterministic. + /// + /// [associative]: https://en.wikipedia.org/wiki/Associative_property + fn reduce_with(self, op: OP) -> Option + where + OP: Fn(Self::Item, Self::Item) -> Self::Item + Sync + Send, + { + fn opt_fold(op: impl Fn(T, T) -> T) -> impl Fn(Option, T) -> Option { + move |opt_a, b| match opt_a { + Some(a) => Some(op(a, b)), + None => Some(b), + } + } + + fn opt_reduce(op: impl Fn(T, T) -> T) -> impl Fn(Option, Option) -> Option { + move |opt_a, opt_b| match (opt_a, opt_b) { + (Some(a), Some(b)) => Some(op(a, b)), + (Some(v), None) | (None, Some(v)) => Some(v), + (None, None) => None, + } + } + + self.fold(<_>::default, opt_fold(&op)) + .reduce(<_>::default, opt_reduce(&op)) + } + + /// Reduces the items in the iterator into one item using a fallible `op`. + /// The `identity` argument is used the same way as in [`reduce()`]. + /// + /// [`reduce()`]: #method.reduce + /// + /// If a `Result::Err` or `Option::None` item is found, or if `op` reduces + /// to one, we will attempt to stop processing the rest of the items in the + /// iterator as soon as possible, and we will return that terminating value. + /// Otherwise, we will return the final reduced `Result::Ok(T)` or + /// `Option::Some(T)`. If there are multiple errors in parallel, it is not + /// specified which will be returned. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// // Compute the sum of squares, being careful about overflow. + /// fn sum_squares>(iter: I) -> Option { + /// iter.into_par_iter() + /// .map(|i| i.checked_mul(i)) // square each item, + /// .try_reduce(|| 0, i32::checked_add) // and add them up! + /// } + /// assert_eq!(sum_squares(0..5), Some(0 + 1 + 4 + 9 + 16)); + /// + /// // The sum might overflow + /// assert_eq!(sum_squares(0..10_000), None); + /// + /// // Or the squares might overflow before it even reaches `try_reduce` + /// assert_eq!(sum_squares(1_000_000..1_000_001), None); + /// ``` + fn try_reduce(self, identity: ID, op: OP) -> Self::Item + where + OP: Fn(T, T) -> Self::Item + Sync + Send, + ID: Fn() -> T + Sync + Send, + Self::Item: Try, + { + try_reduce::try_reduce(self, identity, op) + } + + /// Reduces the items in the iterator into one item using a fallible `op`. + /// + /// Like [`reduce_with()`], if the iterator is empty, `None` is returned; + /// otherwise, `Some` is returned. Beyond that, it behaves like + /// [`try_reduce()`] for handling `Err`/`None`. + /// + /// [`reduce_with()`]: #method.reduce_with + /// [`try_reduce()`]: #method.try_reduce + /// + /// For instance, with `Option` items, the return value may be: + /// - `None`, the iterator was empty + /// - `Some(None)`, we stopped after encountering `None`. + /// - `Some(Some(x))`, the entire iterator reduced to `x`. + /// + /// With `Result` items, the nesting is more obvious: + /// - `None`, the iterator was empty + /// - `Some(Err(e))`, we stopped after encountering an error `e`. + /// - `Some(Ok(x))`, the entire iterator reduced to `x`. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let files = ["/dev/null", "/does/not/exist"]; + /// + /// // Find the biggest file + /// files.into_par_iter() + /// .map(|path| std::fs::metadata(path).map(|m| (path, m.len()))) + /// .try_reduce_with(|a, b| { + /// Ok(if a.1 >= b.1 { a } else { b }) + /// }) + /// .expect("Some value, since the iterator is not empty") + /// .expect_err("not found"); + /// ``` + fn try_reduce_with(self, op: OP) -> Option + where + OP: Fn(T, T) -> Self::Item + Sync + Send, + Self::Item: Try, + { + try_reduce_with::try_reduce_with(self, op) + } + + /// Parallel fold is similar to sequential fold except that the + /// sequence of items may be subdivided before it is + /// folded. Consider a list of numbers like `22 3 77 89 46`. If + /// you used sequential fold to add them (`fold(0, |a,b| a+b)`, + /// you would wind up first adding 0 + 22, then 22 + 3, then 25 + + /// 77, and so forth. The **parallel fold** works similarly except + /// that it first breaks up your list into sublists, and hence + /// instead of yielding up a single sum at the end, it yields up + /// multiple sums. The number of results is nondeterministic, as + /// is the point where the breaks occur. + /// + /// So if did the same parallel fold (`fold(0, |a,b| a+b)`) on + /// our example list, we might wind up with a sequence of two numbers, + /// like so: + /// + /// ```notrust + /// 22 3 77 89 46 + /// | | + /// 102 135 + /// ``` + /// + /// Or perhaps these three numbers: + /// + /// ```notrust + /// 22 3 77 89 46 + /// | | | + /// 102 89 46 + /// ``` + /// + /// In general, Rayon will attempt to find good breaking points + /// that keep all of your cores busy. + /// + /// ### Fold versus reduce + /// + /// The `fold()` and `reduce()` methods each take an identity element + /// and a combining function, but they operate rather differently. + /// + /// `reduce()` requires that the identity function has the same + /// type as the things you are iterating over, and it fully + /// reduces the list of items into a single item. So, for example, + /// imagine we are iterating over a list of bytes `bytes: [128_u8, + /// 64_u8, 64_u8]`. If we used `bytes.reduce(|| 0_u8, |a: u8, b: + /// u8| a + b)`, we would get an overflow. This is because `0`, + /// `a`, and `b` here are all bytes, just like the numbers in the + /// list (I wrote the types explicitly above, but those are the + /// only types you can use). To avoid the overflow, we would need + /// to do something like `bytes.map(|b| b as u32).reduce(|| 0, |a, + /// b| a + b)`, in which case our result would be `256`. + /// + /// In contrast, with `fold()`, the identity function does not + /// have to have the same type as the things you are iterating + /// over, and you potentially get back many results. So, if we + /// continue with the `bytes` example from the previous paragraph, + /// we could do `bytes.fold(|| 0_u32, |a, b| a + (b as u32))` to + /// convert our bytes into `u32`. And of course we might not get + /// back a single sum. + /// + /// There is a more subtle distinction as well, though it's + /// actually implied by the above points. When you use `reduce()`, + /// your reduction function is sometimes called with values that + /// were never part of your original parallel iterator (for + /// example, both the left and right might be a partial sum). With + /// `fold()`, in contrast, the left value in the fold function is + /// always the accumulator, and the right value is always from + /// your original sequence. + /// + /// ### Fold vs Map/Reduce + /// + /// Fold makes sense if you have some operation where it is + /// cheaper to create groups of elements at a time. For example, + /// imagine collecting characters into a string. If you were going + /// to use map/reduce, you might try this: + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let s = + /// ['a', 'b', 'c', 'd', 'e'] + /// .par_iter() + /// .map(|c: &char| format!("{}", c)) + /// .reduce(|| String::new(), + /// |mut a: String, b: String| { a.push_str(&b); a }); + /// + /// assert_eq!(s, "abcde"); + /// ``` + /// + /// Because reduce produces the same type of element as its input, + /// you have to first map each character into a string, and then + /// you can reduce them. This means we create one string per + /// element in our iterator -- not so great. Using `fold`, we can + /// do this instead: + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let s = + /// ['a', 'b', 'c', 'd', 'e'] + /// .par_iter() + /// .fold(|| String::new(), + /// |mut s: String, c: &char| { s.push(*c); s }) + /// .reduce(|| String::new(), + /// |mut a: String, b: String| { a.push_str(&b); a }); + /// + /// assert_eq!(s, "abcde"); + /// ``` + /// + /// Now `fold` will process groups of our characters at a time, + /// and we only make one string per group. We should wind up with + /// some small-ish number of strings roughly proportional to the + /// number of CPUs you have (it will ultimately depend on how busy + /// your processors are). Note that we still need to do a reduce + /// afterwards to combine those groups of strings into a single + /// string. + /// + /// You could use a similar trick to save partial results (e.g., a + /// cache) or something similar. + /// + /// ### Combining fold with other operations + /// + /// You can combine `fold` with `reduce` if you want to produce a + /// single value. This is then roughly equivalent to a map/reduce + /// combination in effect: + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let bytes = 0..22_u8; + /// let sum = bytes.into_par_iter() + /// .fold(|| 0_u32, |a: u32, b: u8| a + (b as u32)) + /// .sum::(); + /// + /// assert_eq!(sum, (0..22).sum()); // compare to sequential + /// ``` + fn fold(self, identity: ID, fold_op: F) -> Fold + where + F: Fn(T, Self::Item) -> T + Sync + Send, + ID: Fn() -> T + Sync + Send, + T: Send, + { + Fold::new(self, identity, fold_op) + } + + /// Applies `fold_op` to the given `init` value with each item of this + /// iterator, finally producing the value for further use. + /// + /// This works essentially like `fold(|| init.clone(), fold_op)`, except + /// it doesn't require the `init` type to be `Sync`, nor any other form + /// of added synchronization. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let bytes = 0..22_u8; + /// let sum = bytes.into_par_iter() + /// .fold_with(0_u32, |a: u32, b: u8| a + (b as u32)) + /// .sum::(); + /// + /// assert_eq!(sum, (0..22).sum()); // compare to sequential + /// ``` + fn fold_with(self, init: T, fold_op: F) -> FoldWith + where + F: Fn(T, Self::Item) -> T + Sync + Send, + T: Send + Clone, + { + FoldWith::new(self, init, fold_op) + } + + /// Performs a fallible parallel fold. + /// + /// This is a variation of [`fold()`] for operations which can fail with + /// `Option::None` or `Result::Err`. The first such failure stops + /// processing the local set of items, without affecting other folds in the + /// iterator's subdivisions. + /// + /// Often, `try_fold()` will be followed by [`try_reduce()`] + /// for a final reduction and global short-circuiting effect. + /// + /// [`fold()`]: #method.fold + /// [`try_reduce()`]: #method.try_reduce + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let bytes = 0..22_u8; + /// let sum = bytes.into_par_iter() + /// .try_fold(|| 0_u32, |a: u32, b: u8| a.checked_add(b as u32)) + /// .try_reduce(|| 0, u32::checked_add); + /// + /// assert_eq!(sum, Some((0..22).sum())); // compare to sequential + /// ``` + fn try_fold(self, identity: ID, fold_op: F) -> TryFold + where + F: Fn(T, Self::Item) -> R + Sync + Send, + ID: Fn() -> T + Sync + Send, + R: Try + Send, + { + TryFold::new(self, identity, fold_op) + } + + /// Performs a fallible parallel fold with a cloneable `init` value. + /// + /// This combines the `init` semantics of [`fold_with()`] and the failure + /// semantics of [`try_fold()`]. + /// + /// [`fold_with()`]: #method.fold_with + /// [`try_fold()`]: #method.try_fold + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let bytes = 0..22_u8; + /// let sum = bytes.into_par_iter() + /// .try_fold_with(0_u32, |a: u32, b: u8| a.checked_add(b as u32)) + /// .try_reduce(|| 0, u32::checked_add); + /// + /// assert_eq!(sum, Some((0..22).sum())); // compare to sequential + /// ``` + fn try_fold_with(self, init: T, fold_op: F) -> TryFoldWith + where + F: Fn(T, Self::Item) -> R + Sync + Send, + R: Try + Send, + T: Clone + Send, + { + TryFoldWith::new(self, init, fold_op) + } + + /// Sums up the items in the iterator. + /// + /// Note that the order in items will be reduced is not specified, + /// so if the `+` operator is not truly [associative] \(as is the + /// case for floating point numbers), then the results are not + /// fully deterministic. + /// + /// [associative]: https://en.wikipedia.org/wiki/Associative_property + /// + /// Basically equivalent to `self.reduce(|| 0, |a, b| a + b)`, + /// except that the type of `0` and the `+` operation may vary + /// depending on the type of value being produced. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let a = [1, 5, 7]; + /// + /// let sum: i32 = a.par_iter().sum(); + /// + /// assert_eq!(sum, 13); + /// ``` + fn sum(self) -> S + where + S: Send + Sum + Sum, + { + sum::sum(self) + } + + /// Multiplies all the items in the iterator. + /// + /// Note that the order in items will be reduced is not specified, + /// so if the `*` operator is not truly [associative] \(as is the + /// case for floating point numbers), then the results are not + /// fully deterministic. + /// + /// [associative]: https://en.wikipedia.org/wiki/Associative_property + /// + /// Basically equivalent to `self.reduce(|| 1, |a, b| a * b)`, + /// except that the type of `1` and the `*` operation may vary + /// depending on the type of value being produced. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// fn factorial(n: u32) -> u32 { + /// (1..n+1).into_par_iter().product() + /// } + /// + /// assert_eq!(factorial(0), 1); + /// assert_eq!(factorial(1), 1); + /// assert_eq!(factorial(5), 120); + /// ``` + fn product

(self) -> P + where + P: Send + Product + Product

, + { + product::product(self) + } + + /// Computes the minimum of all the items in the iterator. If the + /// iterator is empty, `None` is returned; otherwise, `Some(min)` + /// is returned. + /// + /// Note that the order in which the items will be reduced is not + /// specified, so if the `Ord` impl is not truly associative, then + /// the results are not deterministic. + /// + /// Basically equivalent to `self.reduce_with(|a, b| cmp::min(a, b))`. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let a = [45, 74, 32]; + /// + /// assert_eq!(a.par_iter().min(), Some(&32)); + /// + /// let b: [i32; 0] = []; + /// + /// assert_eq!(b.par_iter().min(), None); + /// ``` + fn min(self) -> Option + where + Self::Item: Ord, + { + self.reduce_with(cmp::min) + } + + /// Computes the minimum of all the items in the iterator with respect to + /// the given comparison function. If the iterator is empty, `None` is + /// returned; otherwise, `Some(min)` is returned. + /// + /// Note that the order in which the items will be reduced is not + /// specified, so if the comparison function is not associative, then + /// the results are not deterministic. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let a = [-3_i32, 77, 53, 240, -1]; + /// + /// assert_eq!(a.par_iter().min_by(|x, y| x.cmp(y)), Some(&-3)); + /// ``` + fn min_by(self, f: F) -> Option + where + F: Sync + Send + Fn(&Self::Item, &Self::Item) -> Ordering, + { + fn min(f: impl Fn(&T, &T) -> Ordering) -> impl Fn(T, T) -> T { + move |a, b| match f(&a, &b) { + Ordering::Greater => b, + _ => a, + } + } + + self.reduce_with(min(f)) + } + + /// Computes the item that yields the minimum value for the given + /// function. If the iterator is empty, `None` is returned; + /// otherwise, `Some(item)` is returned. + /// + /// Note that the order in which the items will be reduced is not + /// specified, so if the `Ord` impl is not truly associative, then + /// the results are not deterministic. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let a = [-3_i32, 34, 2, 5, -10, -3, -23]; + /// + /// assert_eq!(a.par_iter().min_by_key(|x| x.abs()), Some(&2)); + /// ``` + fn min_by_key(self, f: F) -> Option + where + K: Ord + Send, + F: Sync + Send + Fn(&Self::Item) -> K, + { + fn key(f: impl Fn(&T) -> K) -> impl Fn(T) -> (K, T) { + move |x| (f(&x), x) + } + + fn min_key(a: (K, T), b: (K, T)) -> (K, T) { + match (a.0).cmp(&b.0) { + Ordering::Greater => b, + _ => a, + } + } + + let (_, x) = self.map(key(f)).reduce_with(min_key)?; + Some(x) + } + + /// Computes the maximum of all the items in the iterator. If the + /// iterator is empty, `None` is returned; otherwise, `Some(max)` + /// is returned. + /// + /// Note that the order in which the items will be reduced is not + /// specified, so if the `Ord` impl is not truly associative, then + /// the results are not deterministic. + /// + /// Basically equivalent to `self.reduce_with(|a, b| cmp::max(a, b))`. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let a = [45, 74, 32]; + /// + /// assert_eq!(a.par_iter().max(), Some(&74)); + /// + /// let b: [i32; 0] = []; + /// + /// assert_eq!(b.par_iter().max(), None); + /// ``` + fn max(self) -> Option + where + Self::Item: Ord, + { + self.reduce_with(cmp::max) + } + + /// Computes the maximum of all the items in the iterator with respect to + /// the given comparison function. If the iterator is empty, `None` is + /// returned; otherwise, `Some(min)` is returned. + /// + /// Note that the order in which the items will be reduced is not + /// specified, so if the comparison function is not associative, then + /// the results are not deterministic. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let a = [-3_i32, 77, 53, 240, -1]; + /// + /// assert_eq!(a.par_iter().max_by(|x, y| x.abs().cmp(&y.abs())), Some(&240)); + /// ``` + fn max_by(self, f: F) -> Option + where + F: Sync + Send + Fn(&Self::Item, &Self::Item) -> Ordering, + { + fn max(f: impl Fn(&T, &T) -> Ordering) -> impl Fn(T, T) -> T { + move |a, b| match f(&a, &b) { + Ordering::Greater => a, + _ => b, + } + } + + self.reduce_with(max(f)) + } + + /// Computes the item that yields the maximum value for the given + /// function. If the iterator is empty, `None` is returned; + /// otherwise, `Some(item)` is returned. + /// + /// Note that the order in which the items will be reduced is not + /// specified, so if the `Ord` impl is not truly associative, then + /// the results are not deterministic. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let a = [-3_i32, 34, 2, 5, -10, -3, -23]; + /// + /// assert_eq!(a.par_iter().max_by_key(|x| x.abs()), Some(&34)); + /// ``` + fn max_by_key(self, f: F) -> Option + where + K: Ord + Send, + F: Sync + Send + Fn(&Self::Item) -> K, + { + fn key(f: impl Fn(&T) -> K) -> impl Fn(T) -> (K, T) { + move |x| (f(&x), x) + } + + fn max_key(a: (K, T), b: (K, T)) -> (K, T) { + match (a.0).cmp(&b.0) { + Ordering::Greater => a, + _ => b, + } + } + + let (_, x) = self.map(key(f)).reduce_with(max_key)?; + Some(x) + } + + /// Takes two iterators and creates a new iterator over both. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let a = [0, 1, 2]; + /// let b = [9, 8, 7]; + /// + /// let par_iter = a.par_iter().chain(b.par_iter()); + /// + /// let chained: Vec<_> = par_iter.cloned().collect(); + /// + /// assert_eq!(&chained[..], &[0, 1, 2, 9, 8, 7]); + /// ``` + fn chain(self, chain: C) -> Chain + where + C: IntoParallelIterator, + { + Chain::new(self, chain.into_par_iter()) + } + + /// Searches for **some** item in the parallel iterator that + /// matches the given predicate and returns it. This operation + /// is similar to [`find` on sequential iterators][find] but + /// the item returned may not be the **first** one in the parallel + /// sequence which matches, since we search the entire sequence in parallel. + /// + /// Once a match is found, we will attempt to stop processing + /// the rest of the items in the iterator as soon as possible + /// (just as `find` stops iterating once a match is found). + /// + /// [find]: https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.find + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let a = [1, 2, 3, 3]; + /// + /// assert_eq!(a.par_iter().find_any(|&&x| x == 3), Some(&3)); + /// + /// assert_eq!(a.par_iter().find_any(|&&x| x == 100), None); + /// ``` + fn find_any

(self, predicate: P) -> Option + where + P: Fn(&Self::Item) -> bool + Sync + Send, + { + find::find(self, predicate) + } + + /// Searches for the sequentially **first** item in the parallel iterator + /// that matches the given predicate and returns it. + /// + /// Once a match is found, all attempts to the right of the match + /// will be stopped, while attempts to the left must continue in case + /// an earlier match is found. + /// + /// Note that not all parallel iterators have a useful order, much like + /// sequential `HashMap` iteration, so "first" may be nebulous. If you + /// just want the first match that discovered anywhere in the iterator, + /// `find_any` is a better choice. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let a = [1, 2, 3, 3]; + /// + /// assert_eq!(a.par_iter().find_first(|&&x| x == 3), Some(&3)); + /// + /// assert_eq!(a.par_iter().find_first(|&&x| x == 100), None); + /// ``` + fn find_first

(self, predicate: P) -> Option + where + P: Fn(&Self::Item) -> bool + Sync + Send, + { + find_first_last::find_first(self, predicate) + } + + /// Searches for the sequentially **last** item in the parallel iterator + /// that matches the given predicate and returns it. + /// + /// Once a match is found, all attempts to the left of the match + /// will be stopped, while attempts to the right must continue in case + /// a later match is found. + /// + /// Note that not all parallel iterators have a useful order, much like + /// sequential `HashMap` iteration, so "last" may be nebulous. When the + /// order doesn't actually matter to you, `find_any` is a better choice. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let a = [1, 2, 3, 3]; + /// + /// assert_eq!(a.par_iter().find_last(|&&x| x == 3), Some(&3)); + /// + /// assert_eq!(a.par_iter().find_last(|&&x| x == 100), None); + /// ``` + fn find_last

(self, predicate: P) -> Option + where + P: Fn(&Self::Item) -> bool + Sync + Send, + { + find_first_last::find_last(self, predicate) + } + + /// Applies the given predicate to the items in the parallel iterator + /// and returns **any** non-None result of the map operation. + /// + /// Once a non-None value is produced from the map operation, we will + /// attempt to stop processing the rest of the items in the iterator + /// as soon as possible. + /// + /// Note that this method only returns **some** item in the parallel + /// iterator that is not None from the map predicate. The item returned + /// may not be the **first** non-None value produced in the parallel + /// sequence, since the entire sequence is mapped over in parallel. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let c = ["lol", "NaN", "5", "5"]; + /// + /// let found_number = c.par_iter().find_map_any(|s| s.parse().ok()); + /// + /// assert_eq!(found_number, Some(5)); + /// ``` + fn find_map_any(self, predicate: P) -> Option + where + P: Fn(Self::Item) -> Option + Sync + Send, + R: Send, + { + fn yes(_: &T) -> bool { + true + } + self.filter_map(predicate).find_any(yes) + } + + /// Applies the given predicate to the items in the parallel iterator and + /// returns the sequentially **first** non-None result of the map operation. + /// + /// Once a non-None value is produced from the map operation, all attempts + /// to the right of the match will be stopped, while attempts to the left + /// must continue in case an earlier match is found. + /// + /// Note that not all parallel iterators have a useful order, much like + /// sequential `HashMap` iteration, so "first" may be nebulous. If you + /// just want the first non-None value discovered anywhere in the iterator, + /// `find_map_any` is a better choice. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let c = ["lol", "NaN", "2", "5"]; + /// + /// let first_number = c.par_iter().find_map_first(|s| s.parse().ok()); + /// + /// assert_eq!(first_number, Some(2)); + /// ``` + fn find_map_first(self, predicate: P) -> Option + where + P: Fn(Self::Item) -> Option + Sync + Send, + R: Send, + { + fn yes(_: &T) -> bool { + true + } + self.filter_map(predicate).find_first(yes) + } + + /// Applies the given predicate to the items in the parallel iterator and + /// returns the sequentially **last** non-None result of the map operation. + /// + /// Once a non-None value is produced from the map operation, all attempts + /// to the left of the match will be stopped, while attempts to the right + /// must continue in case a later match is found. + /// + /// Note that not all parallel iterators have a useful order, much like + /// sequential `HashMap` iteration, so "first" may be nebulous. If you + /// just want the first non-None value discovered anywhere in the iterator, + /// `find_map_any` is a better choice. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let c = ["lol", "NaN", "2", "5"]; + /// + /// let last_number = c.par_iter().find_map_last(|s| s.parse().ok()); + /// + /// assert_eq!(last_number, Some(5)); + /// ``` + fn find_map_last(self, predicate: P) -> Option + where + P: Fn(Self::Item) -> Option + Sync + Send, + R: Send, + { + fn yes(_: &T) -> bool { + true + } + self.filter_map(predicate).find_last(yes) + } + + #[doc(hidden)] + #[deprecated(note = "parallel `find` does not search in order -- use `find_any`, \\ + `find_first`, or `find_last`")] + fn find

(self, predicate: P) -> Option + where + P: Fn(&Self::Item) -> bool + Sync + Send, + { + self.find_any(predicate) + } + + /// Searches for **some** item in the parallel iterator that + /// matches the given predicate, and if so returns true. Once + /// a match is found, we'll attempt to stop process the rest + /// of the items. Proving that there's no match, returning false, + /// does require visiting every item. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let a = [0, 12, 3, 4, 0, 23, 0]; + /// + /// let is_valid = a.par_iter().any(|&x| x > 10); + /// + /// assert!(is_valid); + /// ``` + fn any

(self, predicate: P) -> bool + where + P: Fn(Self::Item) -> bool + Sync + Send, + { + self.map(predicate).find_any(bool::clone).is_some() + } + + /// Tests that every item in the parallel iterator matches the given + /// predicate, and if so returns true. If a counter-example is found, + /// we'll attempt to stop processing more items, then return false. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let a = [0, 12, 3, 4, 0, 23, 0]; + /// + /// let is_valid = a.par_iter().all(|&x| x > 10); + /// + /// assert!(!is_valid); + /// ``` + fn all

(self, predicate: P) -> bool + where + P: Fn(Self::Item) -> bool + Sync + Send, + { + #[inline] + fn is_false(x: &bool) -> bool { + !x + } + + self.map(predicate).find_any(is_false).is_none() + } + + /// Creates an iterator over the `Some` items of this iterator, halting + /// as soon as any `None` is found. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// use std::sync::atomic::{AtomicUsize, Ordering}; + /// + /// let counter = AtomicUsize::new(0); + /// let value = (0_i32..2048) + /// .into_par_iter() + /// .map(|x| { + /// counter.fetch_add(1, Ordering::SeqCst); + /// if x < 1024 { Some(x) } else { None } + /// }) + /// .while_some() + /// .max(); + /// + /// assert!(value < Some(1024)); + /// assert!(counter.load(Ordering::SeqCst) < 2048); // should not have visited every single one + /// ``` + fn while_some(self) -> WhileSome + where + Self: ParallelIterator>, + T: Send, + { + WhileSome::new(self) + } + + /// Wraps an iterator with a fuse in case of panics, to halt all threads + /// as soon as possible. + /// + /// Panics within parallel iterators are always propagated to the caller, + /// but they don't always halt the rest of the iterator right away, due to + /// the internal semantics of [`join`]. This adaptor makes a greater effort + /// to stop processing other items sooner, with the cost of additional + /// synchronization overhead, which may also inhibit some optimizations. + /// + /// [`join`]: ../fn.join.html#panics + /// + /// # Examples + /// + /// If this code didn't use `panic_fuse()`, it would continue processing + /// many more items in other threads (with long sleep delays) before the + /// panic is finally propagated. + /// + /// ```should_panic + /// use rayon::prelude::*; + /// use std::{thread, time}; + /// + /// (0..1_000_000) + /// .into_par_iter() + /// .panic_fuse() + /// .for_each(|i| { + /// // simulate some work + /// thread::sleep(time::Duration::from_secs(1)); + /// assert!(i > 0); // oops! + /// }); + /// ``` + fn panic_fuse(self) -> PanicFuse { + PanicFuse::new(self) + } + + /// Creates a fresh collection containing all the elements produced + /// by this parallel iterator. + /// + /// You may prefer [`collect_into_vec()`] implemented on + /// [`IndexedParallelIterator`], if your underlying iterator also implements + /// it. [`collect_into_vec()`] allocates efficiently with precise knowledge + /// of how many elements the iterator contains, and even allows you to reuse + /// an existing vector's backing store rather than allocating a fresh vector. + /// + /// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html + /// [`collect_into_vec()`]: + /// trait.IndexedParallelIterator.html#method.collect_into_vec + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let sync_vec: Vec<_> = (0..100).into_iter().collect(); + /// + /// let async_vec: Vec<_> = (0..100).into_par_iter().collect(); + /// + /// assert_eq!(sync_vec, async_vec); + /// ``` + fn collect(self) -> C + where + C: FromParallelIterator, + { + C::from_par_iter(self) + } + + /// Unzips the items of a parallel iterator into a pair of arbitrary + /// `ParallelExtend` containers. + /// + /// You may prefer to use `unzip_into_vecs()`, which allocates more + /// efficiently with precise knowledge of how many elements the + /// iterator contains, and even allows you to reuse existing + /// vectors' backing stores rather than allocating fresh vectors. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let a = [(0, 1), (1, 2), (2, 3), (3, 4)]; + /// + /// let (left, right): (Vec<_>, Vec<_>) = a.par_iter().cloned().unzip(); + /// + /// assert_eq!(left, [0, 1, 2, 3]); + /// assert_eq!(right, [1, 2, 3, 4]); + /// ``` + /// + /// Nested pairs can be unzipped too. + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let (values, (squares, cubes)): (Vec<_>, (Vec<_>, Vec<_>)) = (0..4).into_par_iter() + /// .map(|i| (i, (i * i, i * i * i))) + /// .unzip(); + /// + /// assert_eq!(values, [0, 1, 2, 3]); + /// assert_eq!(squares, [0, 1, 4, 9]); + /// assert_eq!(cubes, [0, 1, 8, 27]); + /// ``` + fn unzip(self) -> (FromA, FromB) + where + Self: ParallelIterator, + FromA: Default + Send + ParallelExtend, + FromB: Default + Send + ParallelExtend, + A: Send, + B: Send, + { + unzip::unzip(self) + } + + /// Partitions the items of a parallel iterator into a pair of arbitrary + /// `ParallelExtend` containers. Items for which the `predicate` returns + /// true go into the first container, and the rest go into the second. + /// + /// Note: unlike the standard `Iterator::partition`, this allows distinct + /// collection types for the left and right items. This is more flexible, + /// but may require new type annotations when converting sequential code + /// that used type inferrence assuming the two were the same. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let (left, right): (Vec<_>, Vec<_>) = (0..8).into_par_iter().partition(|x| x % 2 == 0); + /// + /// assert_eq!(left, [0, 2, 4, 6]); + /// assert_eq!(right, [1, 3, 5, 7]); + /// ``` + fn partition(self, predicate: P) -> (A, B) + where + A: Default + Send + ParallelExtend, + B: Default + Send + ParallelExtend, + P: Fn(&Self::Item) -> bool + Sync + Send, + { + unzip::partition(self, predicate) + } + + /// Partitions and maps the items of a parallel iterator into a pair of + /// arbitrary `ParallelExtend` containers. `Either::Left` items go into + /// the first container, and `Either::Right` items go into the second. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// use rayon::iter::Either; + /// + /// let (left, right): (Vec<_>, Vec<_>) = (0..8).into_par_iter() + /// .partition_map(|x| { + /// if x % 2 == 0 { + /// Either::Left(x * 4) + /// } else { + /// Either::Right(x * 3) + /// } + /// }); + /// + /// assert_eq!(left, [0, 8, 16, 24]); + /// assert_eq!(right, [3, 9, 15, 21]); + /// ``` + /// + /// Nested `Either` enums can be split as well. + /// + /// ``` + /// use rayon::prelude::*; + /// use rayon::iter::Either::*; + /// + /// let ((fizzbuzz, fizz), (buzz, other)): ((Vec<_>, Vec<_>), (Vec<_>, Vec<_>)) = (1..20) + /// .into_par_iter() + /// .partition_map(|x| match (x % 3, x % 5) { + /// (0, 0) => Left(Left(x)), + /// (0, _) => Left(Right(x)), + /// (_, 0) => Right(Left(x)), + /// (_, _) => Right(Right(x)), + /// }); + /// + /// assert_eq!(fizzbuzz, [15]); + /// assert_eq!(fizz, [3, 6, 9, 12, 18]); + /// assert_eq!(buzz, [5, 10]); + /// assert_eq!(other, [1, 2, 4, 7, 8, 11, 13, 14, 16, 17, 19]); + /// ``` + fn partition_map(self, predicate: P) -> (A, B) + where + A: Default + Send + ParallelExtend, + B: Default + Send + ParallelExtend, + P: Fn(Self::Item) -> Either + Sync + Send, + L: Send, + R: Send, + { + unzip::partition_map(self, predicate) + } + + /// Intersperses clones of an element between items of this iterator. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let x = vec![1, 2, 3]; + /// let r: Vec<_> = x.into_par_iter().intersperse(-1).collect(); + /// + /// assert_eq!(r, vec![1, -1, 2, -1, 3]); + /// ``` + fn intersperse(self, element: Self::Item) -> Intersperse + where + Self::Item: Clone, + { + Intersperse::new(self, element) + } + + /// Internal method used to define the behavior of this parallel + /// iterator. You should not need to call this directly. + /// + /// This method causes the iterator `self` to start producing + /// items and to feed them to the consumer `consumer` one by one. + /// It may split the consumer before doing so to create the + /// opportunity to produce in parallel. + /// + /// See the [README] for more details on the internals of parallel + /// iterators. + /// + /// [README]: README.md + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer; + + /// Internal method used to define the behavior of this parallel + /// iterator. You should not need to call this directly. + /// + /// Returns the number of items produced by this iterator, if known + /// statically. This can be used by consumers to trigger special fast + /// paths. Therefore, if `Some(_)` is returned, this iterator must only + /// use the (indexed) `Consumer` methods when driving a consumer, such + /// as `split_at()`. Calling `UnindexedConsumer::split_off_left()` or + /// other `UnindexedConsumer` methods -- or returning an inaccurate + /// value -- may result in panics. + /// + /// This method is currently used to optimize `collect` for want + /// of true Rust specialization; it may be removed when + /// specialization is stable. + fn opt_len(&self) -> Option { + None + } +} + +impl IntoParallelIterator for T { + type Iter = T; + type Item = T::Item; + + fn into_par_iter(self) -> T { + self + } +} + +/// An iterator that supports "random access" to its data, meaning +/// that you can split it at arbitrary indices and draw data from +/// those points. +/// +/// **Note:** Not implemented for `u64`, `i64`, `u128`, or `i128` ranges +pub trait IndexedParallelIterator: ParallelIterator { + /// Collects the results of the iterator into the specified + /// vector. The vector is always truncated before execution + /// begins. If possible, reusing the vector across calls can lead + /// to better performance since it reuses the same backing buffer. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// // any prior data will be truncated + /// let mut vec = vec![-1, -2, -3]; + /// + /// (0..5).into_par_iter() + /// .collect_into_vec(&mut vec); + /// + /// assert_eq!(vec, [0, 1, 2, 3, 4]); + /// ``` + fn collect_into_vec(self, target: &mut Vec) { + collect::collect_into_vec(self, target); + } + + /// Unzips the results of the iterator into the specified + /// vectors. The vectors are always truncated before execution + /// begins. If possible, reusing the vectors across calls can lead + /// to better performance since they reuse the same backing buffer. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// // any prior data will be truncated + /// let mut left = vec![42; 10]; + /// let mut right = vec![-1; 10]; + /// + /// (10..15).into_par_iter() + /// .enumerate() + /// .unzip_into_vecs(&mut left, &mut right); + /// + /// assert_eq!(left, [0, 1, 2, 3, 4]); + /// assert_eq!(right, [10, 11, 12, 13, 14]); + /// ``` + fn unzip_into_vecs(self, left: &mut Vec, right: &mut Vec) + where + Self: IndexedParallelIterator, + A: Send, + B: Send, + { + collect::unzip_into_vecs(self, left, right); + } + + /// Iterates over tuples `(A, B)`, where the items `A` are from + /// this iterator and `B` are from the iterator given as argument. + /// Like the `zip` method on ordinary iterators, if the two + /// iterators are of unequal length, you only get the items they + /// have in common. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let result: Vec<_> = (1..4) + /// .into_par_iter() + /// .zip(vec!['a', 'b', 'c']) + /// .collect(); + /// + /// assert_eq!(result, [(1, 'a'), (2, 'b'), (3, 'c')]); + /// ``` + fn zip(self, zip_op: Z) -> Zip + where + Z: IntoParallelIterator, + Z::Iter: IndexedParallelIterator, + { + Zip::new(self, zip_op.into_par_iter()) + } + + /// The same as `Zip`, but requires that both iterators have the same length. + /// + /// # Panics + /// Will panic if `self` and `zip_op` are not the same length. + /// + /// ```should_panic + /// use rayon::prelude::*; + /// + /// let one = [1u8]; + /// let two = [2u8, 2]; + /// let one_iter = one.par_iter(); + /// let two_iter = two.par_iter(); + /// + /// // this will panic + /// let zipped: Vec<(&u8, &u8)> = one_iter.zip_eq(two_iter).collect(); + /// + /// // we should never get here + /// assert_eq!(1, zipped.len()); + /// ``` + fn zip_eq(self, zip_op: Z) -> ZipEq + where + Z: IntoParallelIterator, + Z::Iter: IndexedParallelIterator, + { + let zip_op_iter = zip_op.into_par_iter(); + assert_eq!(self.len(), zip_op_iter.len()); + ZipEq::new(self, zip_op_iter) + } + + /// Interleaves elements of this iterator and the other given + /// iterator. Alternately yields elements from this iterator and + /// the given iterator, until both are exhausted. If one iterator + /// is exhausted before the other, the last elements are provided + /// from the other. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// let (x, y) = (vec![1, 2], vec![3, 4, 5, 6]); + /// let r: Vec = x.into_par_iter().interleave(y).collect(); + /// assert_eq!(r, vec![1, 3, 2, 4, 5, 6]); + /// ``` + fn interleave(self, other: I) -> Interleave + where + I: IntoParallelIterator, + I::Iter: IndexedParallelIterator, + { + Interleave::new(self, other.into_par_iter()) + } + + /// Interleaves elements of this iterator and the other given + /// iterator, until one is exhausted. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// let (x, y) = (vec![1, 2, 3, 4], vec![5, 6]); + /// let r: Vec = x.into_par_iter().interleave_shortest(y).collect(); + /// assert_eq!(r, vec![1, 5, 2, 6, 3]); + /// ``` + fn interleave_shortest(self, other: I) -> InterleaveShortest + where + I: IntoParallelIterator, + I::Iter: IndexedParallelIterator, + { + InterleaveShortest::new(self, other.into_par_iter()) + } + + /// Splits an iterator up into fixed-size chunks. + /// + /// Returns an iterator that returns `Vec`s of the given number of elements. + /// If the number of elements in the iterator is not divisible by `chunk_size`, + /// the last chunk may be shorter than `chunk_size`. + /// + /// See also [`par_chunks()`] and [`par_chunks_mut()`] for similar behavior on + /// slices, without having to allocate intermediate `Vec`s for the chunks. + /// + /// [`par_chunks()`]: ../slice/trait.ParallelSlice.html#method.par_chunks + /// [`par_chunks_mut()`]: ../slice/trait.ParallelSliceMut.html#method.par_chunks_mut + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// let a = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; + /// let r: Vec> = a.into_par_iter().chunks(3).collect(); + /// assert_eq!(r, vec![vec![1,2,3], vec![4,5,6], vec![7,8,9], vec![10]]); + /// ``` + fn chunks(self, chunk_size: usize) -> Chunks { + assert!(chunk_size != 0, "chunk_size must not be zero"); + Chunks::new(self, chunk_size) + } + + /// Lexicographically compares the elements of this `ParallelIterator` with those of + /// another. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// use std::cmp::Ordering::*; + /// + /// let x = vec![1, 2, 3]; + /// assert_eq!(x.par_iter().cmp(&vec![1, 3, 0]), Less); + /// assert_eq!(x.par_iter().cmp(&vec![1, 2, 3]), Equal); + /// assert_eq!(x.par_iter().cmp(&vec![1, 2]), Greater); + /// ``` + fn cmp(self, other: I) -> Ordering + where + I: IntoParallelIterator, + I::Iter: IndexedParallelIterator, + Self::Item: Ord, + { + #[inline] + fn ordering((x, y): (T, T)) -> Ordering { + Ord::cmp(&x, &y) + } + + #[inline] + fn inequal(&ord: &Ordering) -> bool { + ord != Ordering::Equal + } + + let other = other.into_par_iter(); + let ord_len = self.len().cmp(&other.len()); + self.zip(other) + .map(ordering) + .find_first(inequal) + .unwrap_or(ord_len) + } + + /// Lexicographically compares the elements of this `ParallelIterator` with those of + /// another. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// use std::cmp::Ordering::*; + /// use std::f64::NAN; + /// + /// let x = vec![1.0, 2.0, 3.0]; + /// assert_eq!(x.par_iter().partial_cmp(&vec![1.0, 3.0, 0.0]), Some(Less)); + /// assert_eq!(x.par_iter().partial_cmp(&vec![1.0, 2.0, 3.0]), Some(Equal)); + /// assert_eq!(x.par_iter().partial_cmp(&vec![1.0, 2.0]), Some(Greater)); + /// assert_eq!(x.par_iter().partial_cmp(&vec![1.0, NAN]), None); + /// ``` + fn partial_cmp(self, other: I) -> Option + where + I: IntoParallelIterator, + I::Iter: IndexedParallelIterator, + Self::Item: PartialOrd, + { + #[inline] + fn ordering, U>((x, y): (T, U)) -> Option { + PartialOrd::partial_cmp(&x, &y) + } + + #[inline] + fn inequal(&ord: &Option) -> bool { + ord != Some(Ordering::Equal) + } + + let other = other.into_par_iter(); + let ord_len = self.len().cmp(&other.len()); + self.zip(other) + .map(ordering) + .find_first(inequal) + .unwrap_or(Some(ord_len)) + } + + /// Determines if the elements of this `ParallelIterator` + /// are equal to those of another + fn eq(self, other: I) -> bool + where + I: IntoParallelIterator, + I::Iter: IndexedParallelIterator, + Self::Item: PartialEq, + { + #[inline] + fn eq, U>((x, y): (T, U)) -> bool { + PartialEq::eq(&x, &y) + } + + let other = other.into_par_iter(); + self.len() == other.len() && self.zip(other).all(eq) + } + + /// Determines if the elements of this `ParallelIterator` + /// are unequal to those of another + fn ne(self, other: I) -> bool + where + I: IntoParallelIterator, + I::Iter: IndexedParallelIterator, + Self::Item: PartialEq, + { + !self.eq(other) + } + + /// Determines if the elements of this `ParallelIterator` + /// are lexicographically less than those of another. + fn lt(self, other: I) -> bool + where + I: IntoParallelIterator, + I::Iter: IndexedParallelIterator, + Self::Item: PartialOrd, + { + self.partial_cmp(other) == Some(Ordering::Less) + } + + /// Determines if the elements of this `ParallelIterator` + /// are less or equal to those of another. + fn le(self, other: I) -> bool + where + I: IntoParallelIterator, + I::Iter: IndexedParallelIterator, + Self::Item: PartialOrd, + { + let ord = self.partial_cmp(other); + ord == Some(Ordering::Equal) || ord == Some(Ordering::Less) + } + + /// Determines if the elements of this `ParallelIterator` + /// are lexicographically greater than those of another. + fn gt(self, other: I) -> bool + where + I: IntoParallelIterator, + I::Iter: IndexedParallelIterator, + Self::Item: PartialOrd, + { + self.partial_cmp(other) == Some(Ordering::Greater) + } + + /// Determines if the elements of this `ParallelIterator` + /// are less or equal to those of another. + fn ge(self, other: I) -> bool + where + I: IntoParallelIterator, + I::Iter: IndexedParallelIterator, + Self::Item: PartialOrd, + { + let ord = self.partial_cmp(other); + ord == Some(Ordering::Equal) || ord == Some(Ordering::Greater) + } + + /// Yields an index along with each item. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let chars = vec!['a', 'b', 'c']; + /// let result: Vec<_> = chars + /// .into_par_iter() + /// .enumerate() + /// .collect(); + /// + /// assert_eq!(result, [(0, 'a'), (1, 'b'), (2, 'c')]); + /// ``` + fn enumerate(self) -> Enumerate { + Enumerate::new(self) + } + + /// Creates an iterator that steps by the given amount + /// + /// # Examples + /// + /// ``` + ///use rayon::prelude::*; + /// + /// let range = (3..10); + /// let result: Vec = range + /// .into_par_iter() + /// .step_by(3) + /// .collect(); + /// + /// assert_eq!(result, [3, 6, 9]) + /// ``` + /// + /// # Compatibility + /// + /// This method is only available on Rust 1.38 or greater. + #[cfg(step_by)] + fn step_by(self, step: usize) -> StepBy { + StepBy::new(self, step) + } + + /// Creates an iterator that skips the first `n` elements. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let result: Vec<_> = (0..100) + /// .into_par_iter() + /// .skip(95) + /// .collect(); + /// + /// assert_eq!(result, [95, 96, 97, 98, 99]); + /// ``` + fn skip(self, n: usize) -> Skip { + Skip::new(self, n) + } + + /// Creates an iterator that yields the first `n` elements. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let result: Vec<_> = (0..100) + /// .into_par_iter() + /// .take(5) + /// .collect(); + /// + /// assert_eq!(result, [0, 1, 2, 3, 4]); + /// ``` + fn take(self, n: usize) -> Take { + Take::new(self, n) + } + + /// Searches for **some** item in the parallel iterator that + /// matches the given predicate, and returns its index. Like + /// `ParallelIterator::find_any`, the parallel search will not + /// necessarily find the **first** match, and once a match is + /// found we'll attempt to stop processing any more. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let a = [1, 2, 3, 3]; + /// + /// let i = a.par_iter().position_any(|&x| x == 3).expect("found"); + /// assert!(i == 2 || i == 3); + /// + /// assert_eq!(a.par_iter().position_any(|&x| x == 100), None); + /// ``` + fn position_any

(self, predicate: P) -> Option + where + P: Fn(Self::Item) -> bool + Sync + Send, + { + #[inline] + fn check(&(_, p): &(usize, bool)) -> bool { + p + } + + let (i, _) = self.map(predicate).enumerate().find_any(check)?; + Some(i) + } + + /// Searches for the sequentially **first** item in the parallel iterator + /// that matches the given predicate, and returns its index. + /// + /// Like `ParallelIterator::find_first`, once a match is found, + /// all attempts to the right of the match will be stopped, while + /// attempts to the left must continue in case an earlier match + /// is found. + /// + /// Note that not all parallel iterators have a useful order, much like + /// sequential `HashMap` iteration, so "first" may be nebulous. If you + /// just want the first match that discovered anywhere in the iterator, + /// `position_any` is a better choice. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let a = [1, 2, 3, 3]; + /// + /// assert_eq!(a.par_iter().position_first(|&x| x == 3), Some(2)); + /// + /// assert_eq!(a.par_iter().position_first(|&x| x == 100), None); + /// ``` + fn position_first

(self, predicate: P) -> Option + where + P: Fn(Self::Item) -> bool + Sync + Send, + { + #[inline] + fn check(&(_, p): &(usize, bool)) -> bool { + p + } + + let (i, _) = self.map(predicate).enumerate().find_first(check)?; + Some(i) + } + + /// Searches for the sequentially **last** item in the parallel iterator + /// that matches the given predicate, and returns its index. + /// + /// Like `ParallelIterator::find_last`, once a match is found, + /// all attempts to the left of the match will be stopped, while + /// attempts to the right must continue in case a later match + /// is found. + /// + /// Note that not all parallel iterators have a useful order, much like + /// sequential `HashMap` iteration, so "last" may be nebulous. When the + /// order doesn't actually matter to you, `position_any` is a better + /// choice. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let a = [1, 2, 3, 3]; + /// + /// assert_eq!(a.par_iter().position_last(|&x| x == 3), Some(3)); + /// + /// assert_eq!(a.par_iter().position_last(|&x| x == 100), None); + /// ``` + fn position_last

(self, predicate: P) -> Option + where + P: Fn(Self::Item) -> bool + Sync + Send, + { + #[inline] + fn check(&(_, p): &(usize, bool)) -> bool { + p + } + + let (i, _) = self.map(predicate).enumerate().find_last(check)?; + Some(i) + } + + #[doc(hidden)] + #[deprecated( + note = "parallel `position` does not search in order -- use `position_any`, \\ + `position_first`, or `position_last`" + )] + fn position

(self, predicate: P) -> Option + where + P: Fn(Self::Item) -> bool + Sync + Send, + { + self.position_any(predicate) + } + + /// Searches for items in the parallel iterator that match the given + /// predicate, and returns their indices. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let primes = vec![2, 3, 5, 7, 11, 13, 17, 19, 23, 29]; + /// + /// // Find the positions of primes congruent to 1 modulo 6 + /// let p1mod6: Vec<_> = primes.par_iter().positions(|&p| p % 6 == 1).collect(); + /// assert_eq!(p1mod6, [3, 5, 7]); // primes 7, 13, and 19 + /// + /// // Find the positions of primes congruent to 5 modulo 6 + /// let p5mod6: Vec<_> = primes.par_iter().positions(|&p| p % 6 == 5).collect(); + /// assert_eq!(p5mod6, [2, 4, 6, 8, 9]); // primes 5, 11, 17, 23, and 29 + /// ``` + fn positions

(self, predicate: P) -> Positions + where + P: Fn(Self::Item) -> bool + Sync + Send, + { + Positions::new(self, predicate) + } + + /// Produces a new iterator with the elements of this iterator in + /// reverse order. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let result: Vec<_> = (0..5) + /// .into_par_iter() + /// .rev() + /// .collect(); + /// + /// assert_eq!(result, [4, 3, 2, 1, 0]); + /// ``` + fn rev(self) -> Rev { + Rev::new(self) + } + + /// Sets the minimum length of iterators desired to process in each + /// thread. Rayon will not split any smaller than this length, but + /// of course an iterator could already be smaller to begin with. + /// + /// Producers like `zip` and `interleave` will use greater of the two + /// minimums. + /// Chained iterators and iterators inside `flat_map` may each use + /// their own minimum length. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let min = (0..1_000_000) + /// .into_par_iter() + /// .with_min_len(1234) + /// .fold(|| 0, |acc, _| acc + 1) // count how many are in this segment + /// .min().unwrap(); + /// + /// assert!(min >= 1234); + /// ``` + fn with_min_len(self, min: usize) -> MinLen { + MinLen::new(self, min) + } + + /// Sets the maximum length of iterators desired to process in each + /// thread. Rayon will try to split at least below this length, + /// unless that would put it below the length from `with_min_len()`. + /// For example, given min=10 and max=15, a length of 16 will not be + /// split any further. + /// + /// Producers like `zip` and `interleave` will use lesser of the two + /// maximums. + /// Chained iterators and iterators inside `flat_map` may each use + /// their own maximum length. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let max = (0..1_000_000) + /// .into_par_iter() + /// .with_max_len(1234) + /// .fold(|| 0, |acc, _| acc + 1) // count how many are in this segment + /// .max().unwrap(); + /// + /// assert!(max <= 1234); + /// ``` + fn with_max_len(self, max: usize) -> MaxLen { + MaxLen::new(self, max) + } + + /// Produces an exact count of how many items this iterator will + /// produce, presuming no panic occurs. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let par_iter = (0..100).into_par_iter().zip(vec![0; 10]); + /// assert_eq!(par_iter.len(), 10); + /// + /// let vec: Vec<_> = par_iter.collect(); + /// assert_eq!(vec.len(), 10); + /// ``` + fn len(&self) -> usize; + + /// Internal method used to define the behavior of this parallel + /// iterator. You should not need to call this directly. + /// + /// This method causes the iterator `self` to start producing + /// items and to feed them to the consumer `consumer` one by one. + /// It may split the consumer before doing so to create the + /// opportunity to produce in parallel. If a split does happen, it + /// will inform the consumer of the index where the split should + /// occur (unlike `ParallelIterator::drive_unindexed()`). + /// + /// See the [README] for more details on the internals of parallel + /// iterators. + /// + /// [README]: README.md + fn drive>(self, consumer: C) -> C::Result; + + /// Internal method used to define the behavior of this parallel + /// iterator. You should not need to call this directly. + /// + /// This method converts the iterator into a producer P and then + /// invokes `callback.callback()` with P. Note that the type of + /// this producer is not defined as part of the API, since + /// `callback` must be defined generically for all producers. This + /// allows the producer type to contain references; it also means + /// that parallel iterators can adjust that type without causing a + /// breaking change. + /// + /// See the [README] for more details on the internals of parallel + /// iterators. + /// + /// [README]: README.md + fn with_producer>(self, callback: CB) -> CB::Output; +} + +/// `FromParallelIterator` implements the creation of a collection +/// from a [`ParallelIterator`]. By implementing +/// `FromParallelIterator` for a given type, you define how it will be +/// created from an iterator. +/// +/// `FromParallelIterator` is used through [`ParallelIterator`]'s [`collect()`] method. +/// +/// [`ParallelIterator`]: trait.ParallelIterator.html +/// [`collect()`]: trait.ParallelIterator.html#method.collect +/// +/// # Examples +/// +/// Implementing `FromParallelIterator` for your type: +/// +/// ``` +/// use rayon::prelude::*; +/// use std::mem; +/// +/// struct BlackHole { +/// mass: usize, +/// } +/// +/// impl FromParallelIterator for BlackHole { +/// fn from_par_iter(par_iter: I) -> Self +/// where I: IntoParallelIterator +/// { +/// let par_iter = par_iter.into_par_iter(); +/// BlackHole { +/// mass: par_iter.count() * mem::size_of::(), +/// } +/// } +/// } +/// +/// let bh: BlackHole = (0i32..1000).into_par_iter().collect(); +/// assert_eq!(bh.mass, 4000); +/// ``` +pub trait FromParallelIterator +where + T: Send, +{ + /// Creates an instance of the collection from the parallel iterator `par_iter`. + /// + /// If your collection is not naturally parallel, the easiest (and + /// fastest) way to do this is often to collect `par_iter` into a + /// [`LinkedList`] or other intermediate data structure and then + /// sequentially extend your collection. However, a more 'native' + /// technique is to use the [`par_iter.fold`] or + /// [`par_iter.fold_with`] methods to create the collection. + /// Alternatively, if your collection is 'natively' parallel, you + /// can use `par_iter.for_each` to process each element in turn. + /// + /// [`LinkedList`]: https://doc.rust-lang.org/std/collections/struct.LinkedList.html + /// [`par_iter.fold`]: trait.ParallelIterator.html#method.fold + /// [`par_iter.fold_with`]: trait.ParallelIterator.html#method.fold_with + /// [`par_iter.for_each`]: trait.ParallelIterator.html#method.for_each + fn from_par_iter(par_iter: I) -> Self + where + I: IntoParallelIterator; +} + +/// `ParallelExtend` extends an existing collection with items from a [`ParallelIterator`]. +/// +/// [`ParallelIterator`]: trait.ParallelIterator.html +/// +/// # Examples +/// +/// Implementing `ParallelExtend` for your type: +/// +/// ``` +/// use rayon::prelude::*; +/// use std::mem; +/// +/// struct BlackHole { +/// mass: usize, +/// } +/// +/// impl ParallelExtend for BlackHole { +/// fn par_extend(&mut self, par_iter: I) +/// where I: IntoParallelIterator +/// { +/// let par_iter = par_iter.into_par_iter(); +/// self.mass += par_iter.count() * mem::size_of::(); +/// } +/// } +/// +/// let mut bh = BlackHole { mass: 0 }; +/// bh.par_extend(0i32..1000); +/// assert_eq!(bh.mass, 4000); +/// bh.par_extend(0i64..10); +/// assert_eq!(bh.mass, 4080); +/// ``` +pub trait ParallelExtend +where + T: Send, +{ + /// Extends an instance of the collection with the elements drawn + /// from the parallel iterator `par_iter`. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let mut vec = vec![]; + /// vec.par_extend(0..5); + /// vec.par_extend((0..5).into_par_iter().map(|i| i * i)); + /// assert_eq!(vec, [0, 1, 2, 3, 4, 0, 1, 4, 9, 16]); + /// ``` + fn par_extend(&mut self, par_iter: I) + where + I: IntoParallelIterator; +} + +/// `ParallelDrainFull` creates a parallel iterator that moves all items +/// from a collection while retaining the original capacity. +/// +/// Types which are indexable typically implement [`ParallelDrainRange`] +/// instead, where you can drain fully with `par_drain(..)`. +/// +/// [`ParallelDrainRange`]: trait.ParallelDrainRange.html +pub trait ParallelDrainFull { + /// The draining parallel iterator type that will be created. + type Iter: ParallelIterator; + + /// The type of item that the parallel iterator will produce. + /// This is usually the same as `IntoParallelIterator::Item`. + type Item: Send; + + /// Returns a draining parallel iterator over an entire collection. + /// + /// When the iterator is dropped, all items are removed, even if the + /// iterator was not fully consumed. If the iterator is leaked, for example + /// using `std::mem::forget`, it is unspecified how many items are removed. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// use std::collections::{BinaryHeap, HashSet}; + /// + /// let squares: HashSet = (0..10).map(|x| x * x).collect(); + /// + /// let mut heap: BinaryHeap<_> = squares.iter().copied().collect(); + /// assert_eq!( + /// // heaps are drained in arbitrary order + /// heap.par_drain() + /// .inspect(|x| assert!(squares.contains(x))) + /// .count(), + /// squares.len(), + /// ); + /// assert!(heap.is_empty()); + /// assert!(heap.capacity() >= squares.len()); + /// ``` + fn par_drain(self) -> Self::Iter; +} + +/// `ParallelDrainRange` creates a parallel iterator that moves a range of items +/// from a collection while retaining the original capacity. +/// +/// Types which are not indexable may implement [`ParallelDrainFull`] instead. +/// +/// [`ParallelDrainFull`]: trait.ParallelDrainFull.html +pub trait ParallelDrainRange { + /// The draining parallel iterator type that will be created. + type Iter: ParallelIterator; + + /// The type of item that the parallel iterator will produce. + /// This is usually the same as `IntoParallelIterator::Item`. + type Item: Send; + + /// Returns a draining parallel iterator over a range of the collection. + /// + /// When the iterator is dropped, all items in the range are removed, even + /// if the iterator was not fully consumed. If the iterator is leaked, for + /// example using `std::mem::forget`, it is unspecified how many items are + /// removed. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let squares: Vec = (0..10).map(|x| x * x).collect(); + /// + /// println!("RangeFull"); + /// let mut vec = squares.clone(); + /// assert!(vec.par_drain(..) + /// .eq(squares.par_iter().copied())); + /// assert!(vec.is_empty()); + /// assert!(vec.capacity() >= squares.len()); + /// + /// println!("RangeFrom"); + /// let mut vec = squares.clone(); + /// assert!(vec.par_drain(5..) + /// .eq(squares[5..].par_iter().copied())); + /// assert_eq!(&vec[..], &squares[..5]); + /// assert!(vec.capacity() >= squares.len()); + /// + /// println!("RangeTo"); + /// let mut vec = squares.clone(); + /// assert!(vec.par_drain(..5) + /// .eq(squares[..5].par_iter().copied())); + /// assert_eq!(&vec[..], &squares[5..]); + /// assert!(vec.capacity() >= squares.len()); + /// + /// println!("RangeToInclusive"); + /// let mut vec = squares.clone(); + /// assert!(vec.par_drain(..=5) + /// .eq(squares[..=5].par_iter().copied())); + /// assert_eq!(&vec[..], &squares[6..]); + /// assert!(vec.capacity() >= squares.len()); + /// + /// println!("Range"); + /// let mut vec = squares.clone(); + /// assert!(vec.par_drain(3..7) + /// .eq(squares[3..7].par_iter().copied())); + /// assert_eq!(&vec[..3], &squares[..3]); + /// assert_eq!(&vec[3..], &squares[7..]); + /// assert!(vec.capacity() >= squares.len()); + /// + /// println!("RangeInclusive"); + /// let mut vec = squares.clone(); + /// assert!(vec.par_drain(3..=7) + /// .eq(squares[3..=7].par_iter().copied())); + /// assert_eq!(&vec[..3], &squares[..3]); + /// assert_eq!(&vec[3..], &squares[8..]); + /// assert!(vec.capacity() >= squares.len()); + /// ``` + fn par_drain>(self, range: R) -> Self::Iter; +} + +/// We hide the `Try` trait in a private module, as it's only meant to be a +/// stable clone of the standard library's `Try` trait, as yet unstable. +mod private { + /// Clone of `std::ops::Try`. + /// + /// Implementing this trait is not permitted outside of `rayon`. + pub trait Try { + private_decl! {} + + type Ok; + type Error; + fn into_result(self) -> Result; + fn from_ok(v: Self::Ok) -> Self; + fn from_error(v: Self::Error) -> Self; + } + + impl Try for Option { + private_impl! {} + + type Ok = T; + type Error = (); + + fn into_result(self) -> Result { + self.ok_or(()) + } + fn from_ok(v: T) -> Self { + Some(v) + } + fn from_error(_: ()) -> Self { + None + } + } + + impl Try for Result { + private_impl! {} + + type Ok = T; + type Error = E; + + fn into_result(self) -> Result { + self + } + fn from_ok(v: T) -> Self { + Ok(v) + } + fn from_error(v: E) -> Self { + Err(v) + } + } +} diff --git a/src/iter/multizip.rs b/src/iter/multizip.rs new file mode 100644 index 0000000..8e36d08 --- /dev/null +++ b/src/iter/multizip.rs @@ -0,0 +1,338 @@ +use super::plumbing::*; +use super::*; + +/// `MultiZip` is an iterator that zips up a tuple of parallel iterators to +/// produce tuples of their items. +/// +/// It is created by calling `into_par_iter()` on a tuple of types that +/// implement `IntoParallelIterator`, or `par_iter()`/`par_iter_mut()` with +/// types that are iterable by reference. +/// +/// The implementation currently support tuples up to length 12. +/// +/// # Examples +/// +/// ``` +/// use rayon::prelude::*; +/// +/// // This will iterate `r` by mutable reference, like `par_iter_mut()`, while +/// // ranges are all iterated by value like `into_par_iter()`. +/// // Note that the zipped iterator is only as long as the shortest input. +/// let mut r = vec![0; 3]; +/// (&mut r, 1..10, 10..100, 100..1000).into_par_iter() +/// .for_each(|(r, x, y, z)| *r = x * y + z); +/// +/// assert_eq!(&r, &[1 * 10 + 100, 2 * 11 + 101, 3 * 12 + 102]); +/// ``` +/// +/// For a group that should all be iterated by reference, you can use a tuple reference. +/// +/// ``` +/// use rayon::prelude::*; +/// +/// let xs: Vec<_> = (1..10).collect(); +/// let ys: Vec<_> = (10..100).collect(); +/// let zs: Vec<_> = (100..1000).collect(); +/// +/// // Reference each input separately with `IntoParallelIterator`: +/// let r1: Vec<_> = (&xs, &ys, &zs).into_par_iter() +/// .map(|(x, y, z)| x * y + z) +/// .collect(); +/// +/// // Reference them all together with `IntoParallelRefIterator`: +/// let r2: Vec<_> = (xs, ys, zs).par_iter() +/// .map(|(x, y, z)| x * y + z) +/// .collect(); +/// +/// assert_eq!(r1, r2); +/// ``` +/// +/// Mutable references to a tuple will work similarly. +/// +/// ``` +/// use rayon::prelude::*; +/// +/// let mut xs: Vec<_> = (1..4).collect(); +/// let mut ys: Vec<_> = (-4..-1).collect(); +/// let mut zs = vec![0; 3]; +/// +/// // Mutably reference each input separately with `IntoParallelIterator`: +/// (&mut xs, &mut ys, &mut zs).into_par_iter().for_each(|(x, y, z)| { +/// *z += *x + *y; +/// std::mem::swap(x, y); +/// }); +/// +/// assert_eq!(xs, (vec![-4, -3, -2])); +/// assert_eq!(ys, (vec![1, 2, 3])); +/// assert_eq!(zs, (vec![-3, -1, 1])); +/// +/// // Mutably reference them all together with `IntoParallelRefMutIterator`: +/// let mut tuple = (xs, ys, zs); +/// tuple.par_iter_mut().for_each(|(x, y, z)| { +/// *z += *x + *y; +/// std::mem::swap(x, y); +/// }); +/// +/// assert_eq!(tuple, (vec![1, 2, 3], vec![-4, -3, -2], vec![-6, -2, 2])); +/// ``` +#[derive(Debug, Clone)] +pub struct MultiZip { + tuple: T, +} + +// These macros greedily consume 4 or 2 items first to achieve log2 nesting depth. +// For example, 5 => 4,1 => (2,2),1. +// +// The tuples go up to 12, so we might want to greedily consume 8 too, but +// the depth works out the same if we let that expand on the right: +// 9 => 4,5 => (2,2),(4,1) => (2,2),((2,2),1) +// 12 => 4,8 => (2,2),(4,4) => (2,2),((2,2),(2,2)) +// +// But if we ever increase to 13, we would want to split 8,5 rather than 4,9. + +macro_rules! reduce { + ($a:expr, $b:expr, $c:expr, $d:expr, $( $x:expr ),+ => $fn:path) => { + reduce!(reduce!($a, $b, $c, $d => $fn), + reduce!($( $x ),+ => $fn) + => $fn) + }; + ($a:expr, $b:expr, $( $x:expr ),+ => $fn:path) => { + reduce!(reduce!($a, $b => $fn), + reduce!($( $x ),+ => $fn) + => $fn) + }; + ($a:expr, $b:expr => $fn:path) => { $fn($a, $b) }; + ($a:expr => $fn:path) => { $a }; +} + +macro_rules! nest { + ($A:tt, $B:tt, $C:tt, $D:tt, $( $X:tt ),+) => { + (nest!($A, $B, $C, $D), nest!($( $X ),+)) + }; + ($A:tt, $B:tt, $( $X:tt ),+) => { + (($A, $B), nest!($( $X ),+)) + }; + ($A:tt, $B:tt) => { ($A, $B) }; + ($A:tt) => { $A }; +} + +macro_rules! flatten { + ($( $T:ident ),+) => {{ + #[allow(non_snake_case)] + fn flatten<$( $T ),+>(nest!($( $T ),+) : nest!($( $T ),+)) -> ($( $T, )+) { + ($( $T, )+) + } + flatten + }}; +} + +macro_rules! multizip_impls { + ($( + $Tuple:ident { + $(($idx:tt) -> $T:ident)+ + } + )+) => { + $( + impl<$( $T, )+> IntoParallelIterator for ($( $T, )+) + where + $( + $T: IntoParallelIterator, + $T::Iter: IndexedParallelIterator, + )+ + { + type Item = ($( $T::Item, )+); + type Iter = MultiZip<($( $T::Iter, )+)>; + + fn into_par_iter(self) -> Self::Iter { + MultiZip { + tuple: ( $( self.$idx.into_par_iter(), )+ ), + } + } + } + + impl<'a, $( $T, )+> IntoParallelIterator for &'a ($( $T, )+) + where + $( + $T: IntoParallelRefIterator<'a>, + $T::Iter: IndexedParallelIterator, + )+ + { + type Item = ($( $T::Item, )+); + type Iter = MultiZip<($( $T::Iter, )+)>; + + fn into_par_iter(self) -> Self::Iter { + MultiZip { + tuple: ( $( self.$idx.par_iter(), )+ ), + } + } + } + + impl<'a, $( $T, )+> IntoParallelIterator for &'a mut ($( $T, )+) + where + $( + $T: IntoParallelRefMutIterator<'a>, + $T::Iter: IndexedParallelIterator, + )+ + { + type Item = ($( $T::Item, )+); + type Iter = MultiZip<($( $T::Iter, )+)>; + + fn into_par_iter(self) -> Self::Iter { + MultiZip { + tuple: ( $( self.$idx.par_iter_mut(), )+ ), + } + } + } + + impl<$( $T, )+> ParallelIterator for MultiZip<($( $T, )+)> + where + $( $T: IndexedParallelIterator, )+ + { + type Item = ($( $T::Item, )+); + + fn drive_unindexed(self, consumer: CONSUMER) -> CONSUMER::Result + where + CONSUMER: UnindexedConsumer, + { + self.drive(consumer) + } + + fn opt_len(&self) -> Option { + Some(self.len()) + } + } + + impl<$( $T, )+> IndexedParallelIterator for MultiZip<($( $T, )+)> + where + $( $T: IndexedParallelIterator, )+ + { + fn drive(self, consumer: CONSUMER) -> CONSUMER::Result + where + CONSUMER: Consumer, + { + reduce!($( self.tuple.$idx ),+ => IndexedParallelIterator::zip) + .map(flatten!($( $T ),+)) + .drive(consumer) + } + + fn len(&self) -> usize { + reduce!($( self.tuple.$idx.len() ),+ => Ord::min) + } + + fn with_producer(self, callback: CB) -> CB::Output + where + CB: ProducerCallback, + { + reduce!($( self.tuple.$idx ),+ => IndexedParallelIterator::zip) + .map(flatten!($( $T ),+)) + .with_producer(callback) + } + } + )+ + } +} + +multizip_impls! { + Tuple1 { + (0) -> A + } + Tuple2 { + (0) -> A + (1) -> B + } + Tuple3 { + (0) -> A + (1) -> B + (2) -> C + } + Tuple4 { + (0) -> A + (1) -> B + (2) -> C + (3) -> D + } + Tuple5 { + (0) -> A + (1) -> B + (2) -> C + (3) -> D + (4) -> E + } + Tuple6 { + (0) -> A + (1) -> B + (2) -> C + (3) -> D + (4) -> E + (5) -> F + } + Tuple7 { + (0) -> A + (1) -> B + (2) -> C + (3) -> D + (4) -> E + (5) -> F + (6) -> G + } + Tuple8 { + (0) -> A + (1) -> B + (2) -> C + (3) -> D + (4) -> E + (5) -> F + (6) -> G + (7) -> H + } + Tuple9 { + (0) -> A + (1) -> B + (2) -> C + (3) -> D + (4) -> E + (5) -> F + (6) -> G + (7) -> H + (8) -> I + } + Tuple10 { + (0) -> A + (1) -> B + (2) -> C + (3) -> D + (4) -> E + (5) -> F + (6) -> G + (7) -> H + (8) -> I + (9) -> J + } + Tuple11 { + (0) -> A + (1) -> B + (2) -> C + (3) -> D + (4) -> E + (5) -> F + (6) -> G + (7) -> H + (8) -> I + (9) -> J + (10) -> K + } + Tuple12 { + (0) -> A + (1) -> B + (2) -> C + (3) -> D + (4) -> E + (5) -> F + (6) -> G + (7) -> H + (8) -> I + (9) -> J + (10) -> K + (11) -> L + } +} diff --git a/src/iter/noop.rs b/src/iter/noop.rs new file mode 100644 index 0000000..1e55ecb --- /dev/null +++ b/src/iter/noop.rs @@ -0,0 +1,59 @@ +use super::plumbing::*; + +pub(super) struct NoopConsumer; + +impl Consumer for NoopConsumer { + type Folder = NoopConsumer; + type Reducer = NoopReducer; + type Result = (); + + fn split_at(self, _index: usize) -> (Self, Self, NoopReducer) { + (NoopConsumer, NoopConsumer, NoopReducer) + } + + fn into_folder(self) -> Self { + self + } + + fn full(&self) -> bool { + false + } +} + +impl Folder for NoopConsumer { + type Result = (); + + fn consume(self, _item: T) -> Self { + self + } + + fn consume_iter(self, iter: I) -> Self + where + I: IntoIterator, + { + iter.into_iter().for_each(drop); + self + } + + fn complete(self) {} + + fn full(&self) -> bool { + false + } +} + +impl UnindexedConsumer for NoopConsumer { + fn split_off_left(&self) -> Self { + NoopConsumer + } + + fn to_reducer(&self) -> NoopReducer { + NoopReducer + } +} + +pub(super) struct NoopReducer; + +impl Reducer<()> for NoopReducer { + fn reduce(self, _left: (), _right: ()) {} +} diff --git a/src/iter/once.rs b/src/iter/once.rs new file mode 100644 index 0000000..5140b6b --- /dev/null +++ b/src/iter/once.rs @@ -0,0 +1,68 @@ +use crate::iter::plumbing::*; +use crate::iter::*; + +/// Creates a parallel iterator that produces an element exactly once. +/// +/// This admits no parallelism on its own, but it could be chained to existing +/// parallel iterators to extend their contents, or otherwise used for any code +/// that deals with generic parallel iterators. +/// +/// # Examples +/// +/// ``` +/// use rayon::prelude::*; +/// use rayon::iter::once; +/// +/// let pi = (0..1234).into_par_iter() +/// .chain(once(-1)) +/// .chain(1234..10_000); +/// +/// assert_eq!(pi.clone().count(), 10_001); +/// assert_eq!(pi.clone().filter(|&x| x < 0).count(), 1); +/// assert_eq!(pi.position_any(|x| x < 0), Some(1234)); +/// ``` +pub fn once(item: T) -> Once { + Once { item } +} + +/// Iterator adaptor for [the `once()` function](fn.once.html). +#[derive(Clone, Debug)] +pub struct Once { + item: T, +} + +impl ParallelIterator for Once { + type Item = T; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + self.drive(consumer) + } + + fn opt_len(&self) -> Option { + Some(1) + } +} + +impl IndexedParallelIterator for Once { + fn drive(self, consumer: C) -> C::Result + where + C: Consumer, + { + consumer.into_folder().consume(self.item).complete() + } + + fn len(&self) -> usize { + 1 + } + + fn with_producer(self, callback: CB) -> CB::Output + where + CB: ProducerCallback, + { + // Let `OptionProducer` handle it. + Some(self.item).into_par_iter().with_producer(callback) + } +} diff --git a/src/iter/panic_fuse.rs b/src/iter/panic_fuse.rs new file mode 100644 index 0000000..7487230 --- /dev/null +++ b/src/iter/panic_fuse.rs @@ -0,0 +1,342 @@ +use super::plumbing::*; +use super::*; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::thread; + +/// `PanicFuse` is an adaptor that wraps an iterator with a fuse in case +/// of panics, to halt all threads as soon as possible. +/// +/// This struct is created by the [`panic_fuse()`] method on [`ParallelIterator`] +/// +/// [`panic_fuse()`]: trait.ParallelIterator.html#method.panic_fuse +/// [`ParallelIterator`]: trait.ParallelIterator.html +#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] +#[derive(Debug, Clone)] +pub struct PanicFuse { + base: I, +} + +/// Helper that sets a bool to `true` if dropped while unwinding. +#[derive(Clone)] +struct Fuse<'a>(&'a AtomicBool); + +impl<'a> Drop for Fuse<'a> { + #[inline] + fn drop(&mut self) { + if thread::panicking() { + self.0.store(true, Ordering::Relaxed); + } + } +} + +impl<'a> Fuse<'a> { + #[inline] + fn panicked(&self) -> bool { + self.0.load(Ordering::Relaxed) + } +} + +impl PanicFuse +where + I: ParallelIterator, +{ + /// Creates a new `PanicFuse` iterator. + pub(super) fn new(base: I) -> PanicFuse { + PanicFuse { base } + } +} + +impl ParallelIterator for PanicFuse +where + I: ParallelIterator, +{ + type Item = I::Item; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + let panicked = AtomicBool::new(false); + let consumer1 = PanicFuseConsumer { + base: consumer, + fuse: Fuse(&panicked), + }; + self.base.drive_unindexed(consumer1) + } + + fn opt_len(&self) -> Option { + self.base.opt_len() + } +} + +impl IndexedParallelIterator for PanicFuse +where + I: IndexedParallelIterator, +{ + fn drive(self, consumer: C) -> C::Result + where + C: Consumer, + { + let panicked = AtomicBool::new(false); + let consumer1 = PanicFuseConsumer { + base: consumer, + fuse: Fuse(&panicked), + }; + self.base.drive(consumer1) + } + + fn len(&self) -> usize { + self.base.len() + } + + fn with_producer(self, callback: CB) -> CB::Output + where + CB: ProducerCallback, + { + return self.base.with_producer(Callback { callback }); + + struct Callback { + callback: CB, + } + + impl ProducerCallback for Callback + where + CB: ProducerCallback, + { + type Output = CB::Output; + + fn callback

(self, base: P) -> CB::Output + where + P: Producer, + { + let panicked = AtomicBool::new(false); + let producer = PanicFuseProducer { + base, + fuse: Fuse(&panicked), + }; + self.callback.callback(producer) + } + } + } +} + +/// //////////////////////////////////////////////////////////////////////// +/// Producer implementation + +struct PanicFuseProducer<'a, P> { + base: P, + fuse: Fuse<'a>, +} + +impl<'a, P> Producer for PanicFuseProducer<'a, P> +where + P: Producer, +{ + type Item = P::Item; + type IntoIter = PanicFuseIter<'a, P::IntoIter>; + + fn into_iter(self) -> Self::IntoIter { + PanicFuseIter { + base: self.base.into_iter(), + fuse: self.fuse, + } + } + + fn min_len(&self) -> usize { + self.base.min_len() + } + fn max_len(&self) -> usize { + self.base.max_len() + } + + fn split_at(self, index: usize) -> (Self, Self) { + let (left, right) = self.base.split_at(index); + ( + PanicFuseProducer { + base: left, + fuse: self.fuse.clone(), + }, + PanicFuseProducer { + base: right, + fuse: self.fuse, + }, + ) + } + + fn fold_with(self, folder: G) -> G + where + G: Folder, + { + let folder1 = PanicFuseFolder { + base: folder, + fuse: self.fuse, + }; + self.base.fold_with(folder1).base + } +} + +struct PanicFuseIter<'a, I> { + base: I, + fuse: Fuse<'a>, +} + +impl<'a, I> Iterator for PanicFuseIter<'a, I> +where + I: Iterator, +{ + type Item = I::Item; + + fn next(&mut self) -> Option { + if self.fuse.panicked() { + None + } else { + self.base.next() + } + } + + fn size_hint(&self) -> (usize, Option) { + self.base.size_hint() + } +} + +impl<'a, I> DoubleEndedIterator for PanicFuseIter<'a, I> +where + I: DoubleEndedIterator, +{ + fn next_back(&mut self) -> Option { + if self.fuse.panicked() { + None + } else { + self.base.next_back() + } + } +} + +impl<'a, I> ExactSizeIterator for PanicFuseIter<'a, I> +where + I: ExactSizeIterator, +{ + fn len(&self) -> usize { + self.base.len() + } +} + +/// //////////////////////////////////////////////////////////////////////// +/// Consumer implementation + +struct PanicFuseConsumer<'a, C> { + base: C, + fuse: Fuse<'a>, +} + +impl<'a, T, C> Consumer for PanicFuseConsumer<'a, C> +where + C: Consumer, +{ + type Folder = PanicFuseFolder<'a, C::Folder>; + type Reducer = PanicFuseReducer<'a, C::Reducer>; + type Result = C::Result; + + fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) { + let (left, right, reducer) = self.base.split_at(index); + ( + PanicFuseConsumer { + base: left, + fuse: self.fuse.clone(), + }, + PanicFuseConsumer { + base: right, + fuse: self.fuse.clone(), + }, + PanicFuseReducer { + base: reducer, + _fuse: self.fuse, + }, + ) + } + + fn into_folder(self) -> Self::Folder { + PanicFuseFolder { + base: self.base.into_folder(), + fuse: self.fuse, + } + } + + fn full(&self) -> bool { + self.fuse.panicked() || self.base.full() + } +} + +impl<'a, T, C> UnindexedConsumer for PanicFuseConsumer<'a, C> +where + C: UnindexedConsumer, +{ + fn split_off_left(&self) -> Self { + PanicFuseConsumer { + base: self.base.split_off_left(), + fuse: self.fuse.clone(), + } + } + + fn to_reducer(&self) -> Self::Reducer { + PanicFuseReducer { + base: self.base.to_reducer(), + _fuse: self.fuse.clone(), + } + } +} + +struct PanicFuseFolder<'a, C> { + base: C, + fuse: Fuse<'a>, +} + +impl<'a, T, C> Folder for PanicFuseFolder<'a, C> +where + C: Folder, +{ + type Result = C::Result; + + fn consume(mut self, item: T) -> Self { + self.base = self.base.consume(item); + self + } + + fn consume_iter(mut self, iter: I) -> Self + where + I: IntoIterator, + { + fn cool<'a, T>(fuse: &'a Fuse<'_>) -> impl Fn(&T) -> bool + 'a { + move |_| !fuse.panicked() + } + + self.base = { + let fuse = &self.fuse; + let iter = iter.into_iter().take_while(cool(fuse)); + self.base.consume_iter(iter) + }; + self + } + + fn complete(self) -> C::Result { + self.base.complete() + } + + fn full(&self) -> bool { + self.fuse.panicked() || self.base.full() + } +} + +struct PanicFuseReducer<'a, C> { + base: C, + _fuse: Fuse<'a>, +} + +impl<'a, T, C> Reducer for PanicFuseReducer<'a, C> +where + C: Reducer, +{ + fn reduce(self, left: T, right: T) -> T { + self.base.reduce(left, right) + } +} diff --git a/src/iter/par_bridge.rs b/src/iter/par_bridge.rs new file mode 100644 index 0000000..4c2b96e --- /dev/null +++ b/src/iter/par_bridge.rs @@ -0,0 +1,201 @@ +use crossbeam_deque::{Steal, Stealer, Worker}; + +use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; +use std::sync::{Mutex, TryLockError}; +use std::thread::yield_now; + +use crate::current_num_threads; +use crate::iter::plumbing::{bridge_unindexed, Folder, UnindexedConsumer, UnindexedProducer}; +use crate::iter::ParallelIterator; + +/// Conversion trait to convert an `Iterator` to a `ParallelIterator`. +/// +/// This creates a "bridge" from a sequential iterator to a parallel one, by distributing its items +/// across the Rayon thread pool. This has the advantage of being able to parallelize just about +/// anything, but the resulting `ParallelIterator` can be less efficient than if you started with +/// `par_iter` instead. However, it can still be useful for iterators that are difficult to +/// parallelize by other means, like channels or file or network I/O. +/// +/// The resulting iterator is not guaranteed to keep the order of the original iterator. +/// +/// # Examples +/// +/// To use this trait, take an existing `Iterator` and call `par_bridge` on it. After that, you can +/// use any of the `ParallelIterator` methods: +/// +/// ``` +/// use rayon::iter::ParallelBridge; +/// use rayon::prelude::ParallelIterator; +/// use std::sync::mpsc::channel; +/// +/// let rx = { +/// let (tx, rx) = channel(); +/// +/// tx.send("one!"); +/// tx.send("two!"); +/// tx.send("three!"); +/// +/// rx +/// }; +/// +/// let mut output: Vec<&'static str> = rx.into_iter().par_bridge().collect(); +/// output.sort_unstable(); +/// +/// assert_eq!(&*output, &["one!", "three!", "two!"]); +/// ``` +pub trait ParallelBridge: Sized { + /// Creates a bridge from this type to a `ParallelIterator`. + fn par_bridge(self) -> IterBridge; +} + +impl ParallelBridge for T +where + T::Item: Send, +{ + fn par_bridge(self) -> IterBridge { + IterBridge { iter: self } + } +} + +/// `IterBridge` is a parallel iterator that wraps a sequential iterator. +/// +/// This type is created when using the `par_bridge` method on `ParallelBridge`. See the +/// [`ParallelBridge`] documentation for details. +/// +/// [`ParallelBridge`]: trait.ParallelBridge.html +#[derive(Debug, Clone)] +pub struct IterBridge { + iter: Iter, +} + +impl ParallelIterator for IterBridge +where + Iter::Item: Send, +{ + type Item = Iter::Item; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + let split_count = AtomicUsize::new(current_num_threads()); + let worker = Worker::new_fifo(); + let stealer = worker.stealer(); + let done = AtomicBool::new(false); + let iter = Mutex::new((self.iter, worker)); + + bridge_unindexed( + IterParallelProducer { + split_count: &split_count, + done: &done, + iter: &iter, + items: stealer, + }, + consumer, + ) + } +} + +struct IterParallelProducer<'a, Iter: Iterator> { + split_count: &'a AtomicUsize, + done: &'a AtomicBool, + iter: &'a Mutex<(Iter, Worker)>, + items: Stealer, +} + +// manual clone because T doesn't need to be Clone, but the derive assumes it should be +impl<'a, Iter: Iterator + 'a> Clone for IterParallelProducer<'a, Iter> { + fn clone(&self) -> Self { + IterParallelProducer { + split_count: self.split_count, + done: self.done, + iter: self.iter, + items: self.items.clone(), + } + } +} + +impl<'a, Iter: Iterator + Send + 'a> UnindexedProducer for IterParallelProducer<'a, Iter> +where + Iter::Item: Send, +{ + type Item = Iter::Item; + + fn split(self) -> (Self, Option) { + let mut count = self.split_count.load(Ordering::SeqCst); + + loop { + let done = self.done.load(Ordering::SeqCst); + match count.checked_sub(1) { + Some(new_count) if !done => { + let last_count = + self.split_count + .compare_and_swap(count, new_count, Ordering::SeqCst); + if last_count == count { + return (self.clone(), Some(self)); + } else { + count = last_count; + } + } + _ => { + return (self, None); + } + } + } + } + + fn fold_with(self, mut folder: F) -> F + where + F: Folder, + { + loop { + match self.items.steal() { + Steal::Success(it) => { + folder = folder.consume(it); + if folder.full() { + return folder; + } + } + Steal::Empty => { + if self.done.load(Ordering::SeqCst) { + // the iterator is out of items, no use in continuing + return folder; + } else { + // our cache is out of items, time to load more from the iterator + match self.iter.try_lock() { + Ok(mut guard) => { + let count = current_num_threads(); + let count = (count * count) * 2; + + let (ref mut iter, ref worker) = *guard; + + // while worker.len() < count { + // FIXME the new deque doesn't let us count items. We can just + // push a number of items, but that doesn't consider active + // stealers elsewhere. + for _ in 0..count { + if let Some(it) = iter.next() { + worker.push(it); + } else { + self.done.store(true, Ordering::SeqCst); + break; + } + } + } + Err(TryLockError::WouldBlock) => { + // someone else has the mutex, just sit tight until it's ready + yield_now(); //TODO: use a thread=pool-aware yield? (#548) + } + Err(TryLockError::Poisoned(_)) => { + // any panics from other threads will have been caught by the pool, + // and will be re-thrown when joined - just exit + return folder; + } + } + } + } + Steal::Retry => (), + } + } + } +} diff --git a/src/iter/plumbing/README.md b/src/iter/plumbing/README.md new file mode 100644 index 0000000..cd94eae --- /dev/null +++ b/src/iter/plumbing/README.md @@ -0,0 +1,315 @@ +# Parallel Iterators + +These are some notes on the design of the parallel iterator traits. +This file does not describe how to **use** parallel iterators. + +## The challenge + +Parallel iterators are more complicated than sequential iterators. +The reason is that they have to be able to split themselves up and +operate in parallel across the two halves. + +The current design for parallel iterators has two distinct modes in +which they can be used; as we will see, not all iterators support both +modes (which is why there are two): + +- **Pull mode** (the `Producer` and `UnindexedProducer` traits): in this mode, + the iterator is asked to produce the next item using a call to `next`. This + is basically like a normal iterator, but with a twist: you can split the + iterator in half to produce disjoint items in separate threads. + - in the `Producer` trait, splitting is done with `split_at`, which accepts + an index where the split should be performed. Only indexed iterators can + work in this mode, as they know exactly how much data they will produce, + and how to locate the requested index. + - in the `UnindexedProducer` trait, splitting is done with `split`, which + simply requests that the producer divide itself *approximately* in half. + This is useful when the exact length and/or layout is unknown, as with + `String` characters, or when the length might exceed `usize`, as with + `Range` on 32-bit platforms. + - In theory, any `Producer` could act unindexed, but we don't currently + use that possibility. When you know the exact length, a `split` can + simply be implemented as `split_at(length/2)`. +- **Push mode** (the `Consumer` and `UnindexedConsumer` traits): in + this mode, the iterator instead is *given* each item in turn, which + is then processed. This is the opposite of a normal iterator. It's + more like a `for_each` call: each time a new item is produced, the + `consume` method is called with that item. (The traits themselves are + a bit more complex, as they support state that can be threaded + through and ultimately reduced.) Unlike producers, there are two + variants of consumers. The difference is how the split is performed: + - in the `Consumer` trait, splitting is done with `split_at`, which + accepts an index where the split should be performed. All + iterators can work in this mode. The resulting halves thus have an + idea about how much data they expect to consume. + - in the `UnindexedConsumer` trait, splitting is done with + `split_off_left`. There is no index: the resulting halves must be + prepared to process any amount of data, and they don't know where that + data falls in the overall stream. + - Not all consumers can operate in this mode. It works for + `for_each` and `reduce`, for example, but it does not work for + `collect_into_vec`, since in that case the position of each item is + important for knowing where it ends up in the target collection. + +## How iterator execution proceeds + +We'll walk through this example iterator chain to start. This chain +demonstrates more-or-less the full complexity of what can happen. + +```rust +vec1.par_iter() + .zip(vec2.par_iter()) + .flat_map(some_function) + .for_each(some_other_function) +``` + +To handle an iterator chain, we start by creating consumers. This +works from the end. So in this case, the call to `for_each` is the +final step, so it will create a `ForEachConsumer` that, given an item, +just calls `some_other_function` with that item. (`ForEachConsumer` is +a very simple consumer because it doesn't need to thread any state +between items at all.) + +Now, the `for_each` call will pass this consumer to the base iterator, +which is the `flat_map`. It will do this by calling the `drive_unindexed` +method on the `ParallelIterator` trait. `drive_unindexed` basically +says "produce items for this iterator and feed them to this consumer"; +it only works for unindexed consumers. + +(As an aside, it is interesting that only some consumers can work in +unindexed mode, but all producers can *drive* an unindexed consumer. +In contrast, only some producers can drive an *indexed* consumer, but +all consumers can be supplied indexes. Isn't variance neat.) + +As it happens, `FlatMap` only works with unindexed consumers anyway. +This is because flat-map basically has no idea how many items it will +produce. If you ask flat-map to produce the 22nd item, it can't do it, +at least not without some intermediate state. It doesn't know whether +processing the first item will create 1 item, 3 items, or 100; +therefore, to produce an arbitrary item, it would basically just have +to start at the beginning and execute sequentially, which is not what +we want. But for unindexed consumers, this doesn't matter, since they +don't need to know how much data they will get. + +Therefore, `FlatMap` can wrap the `ForEachConsumer` with a +`FlatMapConsumer` that feeds to it. This `FlatMapConsumer` will be +given one item. It will then invoke `some_function` to get a parallel +iterator out. It will then ask this new parallel iterator to drive the +`ForEachConsumer`. The `drive_unindexed` method on `flat_map` can then +pass the `FlatMapConsumer` up the chain to the previous item, which is +`zip`. At this point, something interesting happens. + +## Switching from push to pull mode + +If you think about `zip`, it can't really be implemented as a +consumer, at least not without an intermediate thread and some +channels or something (or maybe coroutines). The problem is that it +has to walk two iterators *in lockstep*. Basically, it can't call two +`drive` methods simultaneously, it can only call one at a time. So at +this point, the `zip` iterator needs to switch from *push mode* into +*pull mode*. + +You'll note that `Zip` is only usable if its inputs implement +`IndexedParallelIterator`, meaning that they can produce data starting +at random points in the stream. This need to switch to push mode is +exactly why. If we want to split a zip iterator at position 22, we +need to be able to start zipping items from index 22 right away, +without having to start from index 0. + +Anyway, so at this point, the `drive_unindexed` method for `Zip` stops +creating consumers. Instead, it creates a *producer*, a `ZipProducer`, +to be exact, and calls the `bridge` function in the `internals` +module. Creating a `ZipProducer` will in turn create producers for +the two iterators being zipped. This is possible because they both +implement `IndexedParallelIterator`. + +The `bridge` function will then connect the consumer, which is +handling the `flat_map` and `for_each`, with the producer, which is +handling the `zip` and its preecessors. It will split down until the +chunks seem reasonably small, then pull items from the producer and +feed them to the consumer. + +## The base case + +The other time that `bridge` gets used is when we bottom out in an +indexed producer, such as a slice or range. There is also a +`bridge_unindexed` equivalent for - you guessed it - unindexed producers, +such as string characters. + + + +## What on earth is `ProducerCallback`? + +We saw that when you call a parallel action method like +`par_iter.reduce()`, that will create a "reducing" consumer and then +invoke `par_iter.drive_unindexed()` (or `par_iter.drive()`) as +appropriate. This may create yet more consumers as we proceed up the +parallel iterator chain. But at some point we're going to get to the +start of the chain, or to a parallel iterator (like `zip()`) that has +to coordinate multiple inputs. At that point, we need to start +converting parallel iterators into producers. + +The way we do this is by invoking the method `with_producer()`, defined on +`IndexedParallelIterator`. This is a callback scheme. In an ideal world, +it would work like this: + +```rust +base_iter.with_producer(|base_producer| { + // here, `base_producer` is the producer for `base_iter` +}); +``` + +In that case, we could implement a combinator like `map()` by getting +the producer for the base iterator, wrapping it to make our own +`MapProducer`, and then passing that to the callback. Something like +this: + +```rust +struct MapProducer<'f, P, F: 'f> { + base: P, + map_op: &'f F, +} + +impl IndexedParallelIterator for Map + where I: IndexedParallelIterator, + F: MapOp, +{ + fn with_producer(self, callback: CB) -> CB::Output { + let map_op = &self.map_op; + self.base_iter.with_producer(|base_producer| { + // Here `producer` is the producer for `self.base_iter`. + // Wrap that to make a `MapProducer` + let map_producer = MapProducer { + base: base_producer, + map_op: map_op + }; + + // invoke the callback with the wrapped version + callback(map_producer) + }); + } +}); +``` + +This example demonstrates some of the power of the callback scheme. +It winds up being a very flexible setup. For one thing, it means we +can take ownership of `par_iter`; we can then in turn give ownership +away of its bits and pieces into the producer (this is very useful if +the iterator owns an `&mut` slice, for example), or create shared +references and put *those* in the producer. In the case of map, for +example, the parallel iterator owns the `map_op`, and we borrow +references to it which we then put into the `MapProducer` (this means +the `MapProducer` can easily split itself and share those references). +The `with_producer` method can also create resources that are needed +during the parallel execution, since the producer does not have to be +returned. + +Unfortunately there is a catch. We can't actually use closures the way +I showed you. To see why, think about the type that `map_producer` +would have to have. If we were going to write the `with_producer` +method using a closure, it would have to look something like this: + +```rust +pub trait IndexedParallelIterator: ParallelIterator { + type Producer; + fn with_producer(self, callback: CB) -> R + where CB: FnOnce(Self::Producer) -> R; + ... +} +``` + +Note that we had to add this associated type `Producer` so that +we could specify the argument of the callback to be `Self::Producer`. +Now, imagine trying to write that `MapProducer` impl using this style: + +```rust +impl IndexedParallelIterator for Map + where I: IndexedParallelIterator, + F: MapOp, +{ + type MapProducer = MapProducer<'f, P::Producer, F>; + // ^^ wait, what is this `'f`? + + fn with_producer(self, callback: CB) -> R + where CB: FnOnce(Self::Producer) -> R + { + let map_op = &self.map_op; + // ^^^^^^ `'f` is (conceptually) the lifetime of this reference, + // so it will be different for each call to `with_producer`! + } +} +``` + +This may look familiar to you: it's the same problem that we have +trying to define an `Iterable` trait. Basically, the producer type +needs to include a lifetime (here, `'f`) that refers to the body of +`with_producer` and hence is not in scope at the impl level. + +If we had [associated type constructors][1598], we could solve this +problem that way. But there is another solution. We can use a +dedicated callback trait like `ProducerCallback`, instead of `FnOnce`: + +[1598]: https://github.com/rust-lang/rfcs/pull/1598 + +```rust +pub trait ProducerCallback { + type Output; + fn callback

(self, producer: P) -> Self::Output + where P: Producer; +} +``` + +Using this trait, the signature of `with_producer()` looks like this: + +```rust +fn with_producer>(self, callback: CB) -> CB::Output; +``` + +Notice that this signature **never has to name the producer type** -- +there is no associated type `Producer` anymore. This is because the +`callback()` method is generically over **all** producers `P`. + +The problem is that now the `||` sugar doesn't work anymore. So we +have to manually create the callback struct, which is a mite tedious. +So our `MapProducer` code looks like this: + +```rust +impl IndexedParallelIterator for Map + where I: IndexedParallelIterator, + F: MapOp, +{ + fn with_producer(self, callback: CB) -> CB::Output + where CB: ProducerCallback + { + return self.base.with_producer(Callback { callback: callback, map_op: self.map_op }); + // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + // Manual version of the closure sugar: create an instance + // of a struct that implements `ProducerCallback`. + + // The struct declaration. Each field is something that need to capture from the + // creating scope. + struct Callback { + callback: CB, + map_op: F, + } + + // Implement the `ProducerCallback` trait. This is pure boilerplate. + impl ProducerCallback for Callback + where F: MapOp, + CB: ProducerCallback + { + type Output = CB::Output; + + fn callback

(self, base: P) -> CB::Output + where P: Producer + { + // The body of the closure is here: + let producer = MapProducer { base: base, + map_op: &self.map_op }; + self.callback.callback(producer) + } + } + } +} +``` + +OK, a bit tedious, but it works! diff --git a/src/iter/plumbing/mod.rs b/src/iter/plumbing/mod.rs new file mode 100644 index 0000000..71d4fb4 --- /dev/null +++ b/src/iter/plumbing/mod.rs @@ -0,0 +1,484 @@ +//! Traits and functions used to implement parallel iteration. These are +//! low-level details -- users of parallel iterators should not need to +//! interact with them directly. See [the `plumbing` README][r] for a general overview. +//! +//! [r]: https://github.com/rayon-rs/rayon/blob/master/src/iter/plumbing/README.md + +use crate::join_context; + +use super::IndexedParallelIterator; + +use std::cmp; +use std::usize; + +/// The `ProducerCallback` trait is a kind of generic closure, +/// [analogous to `FnOnce`][FnOnce]. See [the corresponding section in +/// the plumbing README][r] for more details. +/// +/// [r]: https://github.com/rayon-rs/rayon/blob/master/src/iter/plumbing/README.md#producer-callback +/// [FnOnce]: https://doc.rust-lang.org/std/ops/trait.FnOnce.html +pub trait ProducerCallback { + /// The type of value returned by this callback. Analogous to + /// [`Output` from the `FnOnce` trait][Output]. + /// + /// [Output]: https://doc.rust-lang.org/std/ops/trait.FnOnce.html#associatedtype.Output + type Output; + + /// Invokes the callback with the given producer as argument. The + /// key point of this trait is that this method is generic over + /// `P`, and hence implementors must be defined for any producer. + fn callback

(self, producer: P) -> Self::Output + where + P: Producer; +} + +/// A `Producer` is effectively a "splittable `IntoIterator`". That +/// is, a producer is a value which can be converted into an iterator +/// at any time: at that point, it simply produces items on demand, +/// like any iterator. But what makes a `Producer` special is that, +/// *before* we convert to an iterator, we can also **split** it at a +/// particular point using the `split_at` method. This will yield up +/// two producers, one producing the items before that point, and one +/// producing the items after that point (these two producers can then +/// independently be split further, or be converted into iterators). +/// In Rayon, this splitting is used to divide between threads. +/// See [the `plumbing` README][r] for further details. +/// +/// Note that each producer will always produce a fixed number of +/// items N. However, this number N is not queryable through the API; +/// the consumer is expected to track it. +/// +/// NB. You might expect `Producer` to extend the `IntoIterator` +/// trait. However, [rust-lang/rust#20671][20671] prevents us from +/// declaring the DoubleEndedIterator and ExactSizeIterator +/// constraints on a required IntoIterator trait, so we inline +/// IntoIterator here until that issue is fixed. +/// +/// [r]: https://github.com/rayon-rs/rayon/blob/master/src/iter/plumbing/README.md +/// [20671]: https://github.com/rust-lang/rust/issues/20671 +pub trait Producer: Send + Sized { + /// The type of item that will be produced by this producer once + /// it is converted into an iterator. + type Item; + + /// The type of iterator we will become. + type IntoIter: Iterator + DoubleEndedIterator + ExactSizeIterator; + + /// Convert `self` into an iterator; at this point, no more parallel splits + /// are possible. + fn into_iter(self) -> Self::IntoIter; + + /// The minimum number of items that we will process + /// sequentially. Defaults to 1, which means that we will split + /// all the way down to a single item. This can be raised higher + /// using the [`with_min_len`] method, which will force us to + /// create sequential tasks at a larger granularity. Note that + /// Rayon automatically normally attempts to adjust the size of + /// parallel splits to reduce overhead, so this should not be + /// needed. + /// + /// [`with_min_len`]: ../trait.IndexedParallelIterator.html#method.with_min_len + fn min_len(&self) -> usize { + 1 + } + + /// The maximum number of items that we will process + /// sequentially. Defaults to MAX, which means that we can choose + /// not to split at all. This can be lowered using the + /// [`with_max_len`] method, which will force us to create more + /// parallel tasks. Note that Rayon automatically normally + /// attempts to adjust the size of parallel splits to reduce + /// overhead, so this should not be needed. + /// + /// [`with_max_len`]: ../trait.IndexedParallelIterator.html#method.with_max_len + fn max_len(&self) -> usize { + usize::MAX + } + + /// Split into two producers; one produces items `0..index`, the + /// other `index..N`. Index must be less than or equal to `N`. + fn split_at(self, index: usize) -> (Self, Self); + + /// Iterate the producer, feeding each element to `folder`, and + /// stop when the folder is full (or all elements have been consumed). + /// + /// The provided implementation is sufficient for most iterables. + fn fold_with(self, folder: F) -> F + where + F: Folder, + { + folder.consume_iter(self.into_iter()) + } +} + +/// A consumer is effectively a [generalized "fold" operation][fold], +/// and in fact each consumer will eventually be converted into a +/// [`Folder`]. What makes a consumer special is that, like a +/// [`Producer`], it can be **split** into multiple consumers using +/// the `split_at` method. When a consumer is split, it produces two +/// consumers, as well as a **reducer**. The two consumers can be fed +/// items independently, and when they are done the reducer is used to +/// combine their two results into one. See [the `plumbing` +/// README][r] for further details. +/// +/// [r]: https://github.com/rayon-rs/rayon/blob/master/src/iter/plumbing/README.md +/// [fold]: https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.fold +/// [`Folder`]: trait.Folder.html +/// [`Producer`]: trait.Producer.html +pub trait Consumer: Send + Sized { + /// The type of folder that this consumer can be converted into. + type Folder: Folder; + + /// The type of reducer that is produced if this consumer is split. + type Reducer: Reducer; + + /// The type of result that this consumer will ultimately produce. + type Result: Send; + + /// Divide the consumer into two consumers, one processing items + /// `0..index` and one processing items from `index..`. Also + /// produces a reducer that can be used to reduce the results at + /// the end. + fn split_at(self, index: usize) -> (Self, Self, Self::Reducer); + + /// Convert the consumer into a folder that can consume items + /// sequentially, eventually producing a final result. + fn into_folder(self) -> Self::Folder; + + /// Hint whether this `Consumer` would like to stop processing + /// further items, e.g. if a search has been completed. + fn full(&self) -> bool; +} + +/// The `Folder` trait encapsulates [the standard fold +/// operation][fold]. It can be fed many items using the `consume` +/// method. At the end, once all items have been consumed, it can then +/// be converted (using `complete`) into a final value. +/// +/// [fold]: https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.fold +pub trait Folder: Sized { + /// The type of result that will ultimately be produced by the folder. + type Result; + + /// Consume next item and return new sequential state. + fn consume(self, item: Item) -> Self; + + /// Consume items from the iterator until full, and return new sequential state. + /// + /// This method is **optional**. The default simply iterates over + /// `iter`, invoking `consume` and checking after each iteration + /// whether `full` returns false. + /// + /// The main reason to override it is if you can provide a more + /// specialized, efficient implementation. + fn consume_iter(mut self, iter: I) -> Self + where + I: IntoIterator, + { + for item in iter { + self = self.consume(item); + if self.full() { + break; + } + } + self + } + + /// Finish consuming items, produce final result. + fn complete(self) -> Self::Result; + + /// Hint whether this `Folder` would like to stop processing + /// further items, e.g. if a search has been completed. + fn full(&self) -> bool; +} + +/// The reducer is the final step of a `Consumer` -- after a consumer +/// has been split into two parts, and each of those parts has been +/// fully processed, we are left with two results. The reducer is then +/// used to combine those two results into one. See [the `plumbing` +/// README][r] for further details. +/// +/// [r]: https://github.com/rayon-rs/rayon/blob/master/src/iter/plumbing/README.md +pub trait Reducer { + /// Reduce two final results into one; this is executed after a + /// split. + fn reduce(self, left: Result, right: Result) -> Result; +} + +/// A stateless consumer can be freely copied. These consumers can be +/// used like regular consumers, but they also support a +/// `split_off_left` method that does not take an index to split, but +/// simply splits at some arbitrary point (`for_each`, for example, +/// produces an unindexed consumer). +pub trait UnindexedConsumer: Consumer { + /// Splits off a "left" consumer and returns it. The `self` + /// consumer should then be used to consume the "right" portion of + /// the data. (The ordering matters for methods like find_first -- + /// values produced by the returned value are given precedence + /// over values produced by `self`.) Once the left and right + /// halves have been fully consumed, you should reduce the results + /// with the result of `to_reducer`. + fn split_off_left(&self) -> Self; + + /// Creates a reducer that can be used to combine the results from + /// a split consumer. + fn to_reducer(&self) -> Self::Reducer; +} + +/// A variant on `Producer` which does not know its exact length or +/// cannot represent it in a `usize`. These producers act like +/// ordinary producers except that they cannot be told to split at a +/// particular point. Instead, you just ask them to split 'somewhere'. +/// +/// (In principle, `Producer` could extend this trait; however, it +/// does not because to do so would require producers to carry their +/// own length with them.) +pub trait UnindexedProducer: Send + Sized { + /// The type of item returned by this producer. + type Item; + + /// Split midway into a new producer if possible, otherwise return `None`. + fn split(self) -> (Self, Option); + + /// Iterate the producer, feeding each element to `folder`, and + /// stop when the folder is full (or all elements have been consumed). + fn fold_with(self, folder: F) -> F + where + F: Folder; +} + +/// A splitter controls the policy for splitting into smaller work items. +/// +/// Thief-splitting is an adaptive policy that starts by splitting into +/// enough jobs for every worker thread, and then resets itself whenever a +/// job is actually stolen into a different thread. +#[derive(Clone, Copy)] +struct Splitter { + /// The `splits` tell us approximately how many remaining times we'd + /// like to split this job. We always just divide it by two though, so + /// the effective number of pieces will be `next_power_of_two()`. + splits: usize, +} + +impl Splitter { + #[inline] + fn new() -> Splitter { + Splitter { + splits: crate::current_num_threads(), + } + } + + #[inline] + fn try_split(&mut self, stolen: bool) -> bool { + let Splitter { splits } = *self; + + if stolen { + // This job was stolen! Reset the number of desired splits to the + // thread count, if that's more than we had remaining anyway. + self.splits = cmp::max(crate::current_num_threads(), self.splits / 2); + true + } else if splits > 0 { + // We have splits remaining, make it so. + self.splits /= 2; + true + } else { + // Not stolen, and no more splits -- we're done! + false + } + } +} + +/// The length splitter is built on thief-splitting, but additionally takes +/// into account the remaining length of the iterator. +#[derive(Clone, Copy)] +struct LengthSplitter { + inner: Splitter, + + /// The smallest we're willing to divide into. Usually this is just 1, + /// but you can choose a larger working size with `with_min_len()`. + min: usize, +} + +impl LengthSplitter { + /// Creates a new splitter based on lengths. + /// + /// The `min` is a hard lower bound. We'll never split below that, but + /// of course an iterator might start out smaller already. + /// + /// The `max` is an upper bound on the working size, used to determine + /// the minimum number of times we need to split to get under that limit. + /// The adaptive algorithm may very well split even further, but never + /// smaller than the `min`. + #[inline] + fn new(min: usize, max: usize, len: usize) -> LengthSplitter { + let mut splitter = LengthSplitter { + inner: Splitter::new(), + min: cmp::max(min, 1), + }; + + // Divide the given length by the max working length to get the minimum + // number of splits we need to get under that max. This rounds down, + // but the splitter actually gives `next_power_of_two()` pieces anyway. + // e.g. len 12345 / max 100 = 123 min_splits -> 128 pieces. + let min_splits = len / cmp::max(max, 1); + + // Only update the value if it's not splitting enough already. + if min_splits > splitter.inner.splits { + splitter.inner.splits = min_splits; + } + + splitter + } + + #[inline] + fn try_split(&mut self, len: usize, stolen: bool) -> bool { + // If splitting wouldn't make us too small, try the inner splitter. + len / 2 >= self.min && self.inner.try_split(stolen) + } +} + +/// This helper function is used to "connect" a parallel iterator to a +/// consumer. It will convert the `par_iter` into a producer P and +/// then pull items from P and feed them to `consumer`, splitting and +/// creating parallel threads as needed. +/// +/// This is useful when you are implementing your own parallel +/// iterators: it is often used as the definition of the +/// [`drive_unindexed`] or [`drive`] methods. +/// +/// [`drive_unindexed`]: ../trait.ParallelIterator.html#tymethod.drive_unindexed +/// [`drive`]: ../trait.IndexedParallelIterator.html#tymethod.drive +pub fn bridge(par_iter: I, consumer: C) -> C::Result +where + I: IndexedParallelIterator, + C: Consumer, +{ + let len = par_iter.len(); + return par_iter.with_producer(Callback { len, consumer }); + + struct Callback { + len: usize, + consumer: C, + } + + impl ProducerCallback for Callback + where + C: Consumer, + { + type Output = C::Result; + fn callback

(self, producer: P) -> C::Result + where + P: Producer, + { + bridge_producer_consumer(self.len, producer, self.consumer) + } + } +} + +/// This helper function is used to "connect" a producer and a +/// consumer. You may prefer to call [`bridge`], which wraps this +/// function. This function will draw items from `producer` and feed +/// them to `consumer`, splitting and creating parallel tasks when +/// needed. +/// +/// This is useful when you are implementing your own parallel +/// iterators: it is often used as the definition of the +/// [`drive_unindexed`] or [`drive`] methods. +/// +/// [`bridge`]: fn.bridge.html +/// [`drive_unindexed`]: ../trait.ParallelIterator.html#tymethod.drive_unindexed +/// [`drive`]: ../trait.IndexedParallelIterator.html#tymethod.drive +pub fn bridge_producer_consumer(len: usize, producer: P, consumer: C) -> C::Result +where + P: Producer, + C: Consumer, +{ + let splitter = LengthSplitter::new(producer.min_len(), producer.max_len(), len); + return helper(len, false, splitter, producer, consumer); + + fn helper( + len: usize, + migrated: bool, + mut splitter: LengthSplitter, + producer: P, + consumer: C, + ) -> C::Result + where + P: Producer, + C: Consumer, + { + if consumer.full() { + consumer.into_folder().complete() + } else if splitter.try_split(len, migrated) { + let mid = len / 2; + let (left_producer, right_producer) = producer.split_at(mid); + let (left_consumer, right_consumer, reducer) = consumer.split_at(mid); + let (left_result, right_result) = join_context( + |context| { + helper( + mid, + context.migrated(), + splitter, + left_producer, + left_consumer, + ) + }, + |context| { + helper( + len - mid, + context.migrated(), + splitter, + right_producer, + right_consumer, + ) + }, + ); + reducer.reduce(left_result, right_result) + } else { + producer.fold_with(consumer.into_folder()).complete() + } + } +} + +/// A variant of [`bridge_producer_consumer`] where the producer is an unindexed producer. +/// +/// [`bridge_producer_consumer`]: fn.bridge_producer_consumer.html +pub fn bridge_unindexed(producer: P, consumer: C) -> C::Result +where + P: UnindexedProducer, + C: UnindexedConsumer, +{ + let splitter = Splitter::new(); + bridge_unindexed_producer_consumer(false, splitter, producer, consumer) +} + +fn bridge_unindexed_producer_consumer( + migrated: bool, + mut splitter: Splitter, + producer: P, + consumer: C, +) -> C::Result +where + P: UnindexedProducer, + C: UnindexedConsumer, +{ + if consumer.full() { + consumer.into_folder().complete() + } else if splitter.try_split(migrated) { + match producer.split() { + (left_producer, Some(right_producer)) => { + let (reducer, left_consumer, right_consumer) = + (consumer.to_reducer(), consumer.split_off_left(), consumer); + let bridge = bridge_unindexed_producer_consumer; + let (left_result, right_result) = join_context( + |context| bridge(context.migrated(), splitter, left_producer, left_consumer), + |context| bridge(context.migrated(), splitter, right_producer, right_consumer), + ); + reducer.reduce(left_result, right_result) + } + (producer, None) => producer.fold_with(consumer.into_folder()).complete(), + } + } else { + producer.fold_with(consumer.into_folder()).complete() + } +} diff --git a/src/iter/positions.rs b/src/iter/positions.rs new file mode 100644 index 0000000..f584bb2 --- /dev/null +++ b/src/iter/positions.rs @@ -0,0 +1,137 @@ +use super::plumbing::*; +use super::*; + +use std::fmt::{self, Debug}; + +/// `Positions` takes a predicate `predicate` and filters out elements that match, +/// yielding their indices. +/// +/// This struct is created by the [`positions()`] method on [`IndexedParallelIterator`] +/// +/// [`positions()`]: trait.IndexedParallelIterator.html#method.positions +/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html +#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] +#[derive(Clone)] +pub struct Positions { + base: I, + predicate: P, +} + +impl Debug for Positions { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Positions") + .field("base", &self.base) + .finish() + } +} + +impl Positions +where + I: IndexedParallelIterator, +{ + /// Create a new `Positions` iterator. + pub(super) fn new(base: I, predicate: P) -> Self { + Positions { base, predicate } + } +} + +impl ParallelIterator for Positions +where + I: IndexedParallelIterator, + P: Fn(I::Item) -> bool + Sync + Send, +{ + type Item = usize; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + let consumer1 = PositionsConsumer::new(consumer, &self.predicate, 0); + self.base.drive(consumer1) + } +} + +/// //////////////////////////////////////////////////////////////////////// +/// Consumer implementation + +struct PositionsConsumer<'p, C, P> { + base: C, + predicate: &'p P, + offset: usize, +} + +impl<'p, C, P> PositionsConsumer<'p, C, P> { + fn new(base: C, predicate: &'p P, offset: usize) -> Self { + PositionsConsumer { + base, + predicate, + offset, + } + } +} + +impl<'p, T, C, P> Consumer for PositionsConsumer<'p, C, P> +where + C: Consumer, + P: Fn(T) -> bool + Sync, +{ + type Folder = PositionsFolder<'p, C::Folder, P>; + type Reducer = C::Reducer; + type Result = C::Result; + + fn split_at(self, index: usize) -> (Self, Self, C::Reducer) { + let (left, right, reducer) = self.base.split_at(index); + ( + PositionsConsumer::new(left, self.predicate, self.offset), + PositionsConsumer::new(right, self.predicate, self.offset + index), + reducer, + ) + } + + fn into_folder(self) -> Self::Folder { + PositionsFolder { + base: self.base.into_folder(), + predicate: self.predicate, + offset: self.offset, + } + } + + fn full(&self) -> bool { + self.base.full() + } +} + +struct PositionsFolder<'p, F, P> { + base: F, + predicate: &'p P, + offset: usize, +} + +impl Folder for PositionsFolder<'_, F, P> +where + F: Folder, + P: Fn(T) -> bool, +{ + type Result = F::Result; + + fn consume(mut self, item: T) -> Self { + let index = self.offset; + self.offset += 1; + if (self.predicate)(item) { + self.base = self.base.consume(index); + } + self + } + + // This cannot easily specialize `consume_iter` to be better than + // the default, because that requires checking `self.base.full()` + // during a call to `self.base.consume_iter()`. (#632) + + fn complete(self) -> Self::Result { + self.base.complete() + } + + fn full(&self) -> bool { + self.base.full() + } +} diff --git a/src/iter/product.rs b/src/iter/product.rs new file mode 100644 index 0000000..a3d0727 --- /dev/null +++ b/src/iter/product.rs @@ -0,0 +1,114 @@ +use super::plumbing::*; +use super::ParallelIterator; + +use std::iter::{self, Product}; +use std::marker::PhantomData; + +pub(super) fn product(pi: PI) -> P +where + PI: ParallelIterator, + P: Send + Product + Product, +{ + pi.drive_unindexed(ProductConsumer::new()) +} + +fn mul(left: T, right: T) -> T { + iter::once(left).chain(iter::once(right)).product() +} + +struct ProductConsumer { + _marker: PhantomData<*const P>, +} + +unsafe impl Send for ProductConsumer

{} + +impl ProductConsumer

{ + fn new() -> ProductConsumer

{ + ProductConsumer { + _marker: PhantomData, + } + } +} + +impl Consumer for ProductConsumer

+where + P: Send + Product + Product, +{ + type Folder = ProductFolder

; + type Reducer = Self; + type Result = P; + + fn split_at(self, _index: usize) -> (Self, Self, Self) { + ( + ProductConsumer::new(), + ProductConsumer::new(), + ProductConsumer::new(), + ) + } + + fn into_folder(self) -> Self::Folder { + ProductFolder { + product: iter::empty::().product(), + } + } + + fn full(&self) -> bool { + false + } +} + +impl UnindexedConsumer for ProductConsumer

+where + P: Send + Product + Product, +{ + fn split_off_left(&self) -> Self { + ProductConsumer::new() + } + + fn to_reducer(&self) -> Self::Reducer { + ProductConsumer::new() + } +} + +impl

Reducer

for ProductConsumer

+where + P: Send + Product, +{ + fn reduce(self, left: P, right: P) -> P { + mul(left, right) + } +} + +struct ProductFolder

{ + product: P, +} + +impl Folder for ProductFolder

+where + P: Product + Product, +{ + type Result = P; + + fn consume(self, item: T) -> Self { + ProductFolder { + product: mul(self.product, iter::once(item).product()), + } + } + + fn consume_iter(self, iter: I) -> Self + where + I: IntoIterator, + { + ProductFolder { + product: mul(self.product, iter.into_iter().product()), + } + } + + fn complete(self) -> P { + self.product + } + + fn full(&self) -> bool { + false + } +} diff --git a/src/iter/reduce.rs b/src/iter/reduce.rs new file mode 100644 index 0000000..321b5dd --- /dev/null +++ b/src/iter/reduce.rs @@ -0,0 +1,116 @@ +use super::plumbing::*; +use super::ParallelIterator; + +pub(super) fn reduce(pi: PI, identity: ID, reduce_op: R) -> T +where + PI: ParallelIterator, + R: Fn(T, T) -> T + Sync, + ID: Fn() -> T + Sync, + T: Send, +{ + let consumer = ReduceConsumer { + identity: &identity, + reduce_op: &reduce_op, + }; + pi.drive_unindexed(consumer) +} + +struct ReduceConsumer<'r, R, ID> { + identity: &'r ID, + reduce_op: &'r R, +} + +impl<'r, R, ID> Copy for ReduceConsumer<'r, R, ID> {} + +impl<'r, R, ID> Clone for ReduceConsumer<'r, R, ID> { + fn clone(&self) -> Self { + *self + } +} + +impl<'r, R, ID, T> Consumer for ReduceConsumer<'r, R, ID> +where + R: Fn(T, T) -> T + Sync, + ID: Fn() -> T + Sync, + T: Send, +{ + type Folder = ReduceFolder<'r, R, T>; + type Reducer = Self; + type Result = T; + + fn split_at(self, _index: usize) -> (Self, Self, Self) { + (self, self, self) + } + + fn into_folder(self) -> Self::Folder { + ReduceFolder { + reduce_op: self.reduce_op, + item: (self.identity)(), + } + } + + fn full(&self) -> bool { + false + } +} + +impl<'r, R, ID, T> UnindexedConsumer for ReduceConsumer<'r, R, ID> +where + R: Fn(T, T) -> T + Sync, + ID: Fn() -> T + Sync, + T: Send, +{ + fn split_off_left(&self) -> Self { + *self + } + + fn to_reducer(&self) -> Self::Reducer { + *self + } +} + +impl<'r, R, ID, T> Reducer for ReduceConsumer<'r, R, ID> +where + R: Fn(T, T) -> T + Sync, +{ + fn reduce(self, left: T, right: T) -> T { + (self.reduce_op)(left, right) + } +} + +struct ReduceFolder<'r, R, T> { + reduce_op: &'r R, + item: T, +} + +impl<'r, R, T> Folder for ReduceFolder<'r, R, T> +where + R: Fn(T, T) -> T, +{ + type Result = T; + + fn consume(self, item: T) -> Self { + ReduceFolder { + reduce_op: self.reduce_op, + item: (self.reduce_op)(self.item, item), + } + } + + fn consume_iter(self, iter: I) -> Self + where + I: IntoIterator, + { + ReduceFolder { + reduce_op: self.reduce_op, + item: iter.into_iter().fold(self.item, self.reduce_op), + } + } + + fn complete(self) -> T { + self.item + } + + fn full(&self) -> bool { + false + } +} diff --git a/src/iter/repeat.rs b/src/iter/repeat.rs new file mode 100644 index 0000000..f84a6fe --- /dev/null +++ b/src/iter/repeat.rs @@ -0,0 +1,241 @@ +use super::plumbing::*; +use super::*; +use std::iter; +use std::usize; + +/// Iterator adaptor for [the `repeat()` function](fn.repeat.html). +#[derive(Debug, Clone)] +pub struct Repeat { + element: T, +} + +/// Creates a parallel iterator that endlessly repeats `elt` (by +/// cloning it). Note that this iterator has "infinite" length, so +/// typically you would want to use `zip` or `take` or some other +/// means to shorten it, or consider using +/// [the `repeatn()` function](fn.repeatn.html) instead. +/// +/// # Examples +/// +/// ``` +/// use rayon::prelude::*; +/// use rayon::iter::repeat; +/// let x: Vec<(i32, i32)> = repeat(22).zip(0..3).collect(); +/// assert_eq!(x, vec![(22, 0), (22, 1), (22, 2)]); +/// ``` +pub fn repeat(elt: T) -> Repeat { + Repeat { element: elt } +} + +impl Repeat +where + T: Clone + Send, +{ + /// Takes only `n` repeats of the element, similar to the general + /// [`take()`](trait.IndexedParallelIterator.html#method.take). + /// + /// The resulting `RepeatN` is an `IndexedParallelIterator`, allowing + /// more functionality than `Repeat` alone. + pub fn take(self, n: usize) -> RepeatN { + repeatn(self.element, n) + } + + /// Iterates tuples, repeating the element with items from another + /// iterator, similar to the general + /// [`zip()`](trait.IndexedParallelIterator.html#method.zip). + pub fn zip(self, zip_op: Z) -> Zip, Z::Iter> + where + Z: IntoParallelIterator, + Z::Iter: IndexedParallelIterator, + { + let z = zip_op.into_par_iter(); + let n = z.len(); + self.take(n).zip(z) + } +} + +impl ParallelIterator for Repeat +where + T: Clone + Send, +{ + type Item = T; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + let producer = RepeatProducer { + element: self.element, + }; + bridge_unindexed(producer, consumer) + } +} + +/// Unindexed producer for `Repeat`. +struct RepeatProducer { + element: T, +} + +impl UnindexedProducer for RepeatProducer { + type Item = T; + + fn split(self) -> (Self, Option) { + ( + RepeatProducer { + element: self.element.clone(), + }, + Some(RepeatProducer { + element: self.element, + }), + ) + } + + fn fold_with(self, folder: F) -> F + where + F: Folder, + { + folder.consume_iter(iter::repeat(self.element)) + } +} + +/// Iterator adaptor for [the `repeatn()` function](fn.repeatn.html). +#[derive(Debug, Clone)] +pub struct RepeatN { + element: T, + count: usize, +} + +/// Creates a parallel iterator that produces `n` repeats of `elt` +/// (by cloning it). +/// +/// # Examples +/// +/// ``` +/// use rayon::prelude::*; +/// use rayon::iter::repeatn; +/// let x: Vec<(i32, i32)> = repeatn(22, 3).zip(0..3).collect(); +/// assert_eq!(x, vec![(22, 0), (22, 1), (22, 2)]); +/// ``` +pub fn repeatn(elt: T, n: usize) -> RepeatN { + RepeatN { + element: elt, + count: n, + } +} + +impl ParallelIterator for RepeatN +where + T: Clone + Send, +{ + type Item = T; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + bridge(self, consumer) + } + + fn opt_len(&self) -> Option { + Some(self.count) + } +} + +impl IndexedParallelIterator for RepeatN +where + T: Clone + Send, +{ + fn drive(self, consumer: C) -> C::Result + where + C: Consumer, + { + bridge(self, consumer) + } + + fn with_producer(self, callback: CB) -> CB::Output + where + CB: ProducerCallback, + { + callback.callback(RepeatNProducer { + element: self.element, + count: self.count, + }) + } + + fn len(&self) -> usize { + self.count + } +} + +/// Producer for `RepeatN`. +struct RepeatNProducer { + element: T, + count: usize, +} + +impl Producer for RepeatNProducer { + type Item = T; + type IntoIter = Iter; + + fn into_iter(self) -> Self::IntoIter { + Iter { + element: self.element, + count: self.count, + } + } + + fn split_at(self, index: usize) -> (Self, Self) { + ( + RepeatNProducer { + element: self.element.clone(), + count: index, + }, + RepeatNProducer { + element: self.element, + count: self.count - index, + }, + ) + } +} + +/// Iterator for `RepeatN`. +/// +/// This is conceptually like `std::iter::Take>`, but +/// we need `DoubleEndedIterator` and unconditional `ExactSizeIterator`. +struct Iter { + element: T, + count: usize, +} + +impl Iterator for Iter { + type Item = T; + + #[inline] + fn next(&mut self) -> Option { + if self.count > 0 { + self.count -= 1; + Some(self.element.clone()) + } else { + None + } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + (self.count, Some(self.count)) + } +} + +impl DoubleEndedIterator for Iter { + #[inline] + fn next_back(&mut self) -> Option { + self.next() + } +} + +impl ExactSizeIterator for Iter { + #[inline] + fn len(&self) -> usize { + self.count + } +} diff --git a/src/iter/rev.rs b/src/iter/rev.rs new file mode 100644 index 0000000..a4c3b7c --- /dev/null +++ b/src/iter/rev.rs @@ -0,0 +1,123 @@ +use super::plumbing::*; +use super::*; +use std::iter; + +/// `Rev` is an iterator that produces elements in reverse order. This struct +/// is created by the [`rev()`] method on [`IndexedParallelIterator`] +/// +/// [`rev()`]: trait.IndexedParallelIterator.html#method.rev +/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html +#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] +#[derive(Debug, Clone)] +pub struct Rev { + base: I, +} + +impl Rev +where + I: IndexedParallelIterator, +{ + /// Creates a new `Rev` iterator. + pub(super) fn new(base: I) -> Self { + Rev { base } + } +} + +impl ParallelIterator for Rev +where + I: IndexedParallelIterator, +{ + type Item = I::Item; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + bridge(self, consumer) + } + + fn opt_len(&self) -> Option { + Some(self.len()) + } +} + +impl IndexedParallelIterator for Rev +where + I: IndexedParallelIterator, +{ + fn drive>(self, consumer: C) -> C::Result { + bridge(self, consumer) + } + + fn len(&self) -> usize { + self.base.len() + } + + fn with_producer(self, callback: CB) -> CB::Output + where + CB: ProducerCallback, + { + let len = self.base.len(); + return self.base.with_producer(Callback { callback, len }); + + struct Callback { + callback: CB, + len: usize, + } + + impl ProducerCallback for Callback + where + CB: ProducerCallback, + { + type Output = CB::Output; + fn callback

(self, base: P) -> CB::Output + where + P: Producer, + { + let producer = RevProducer { + base, + len: self.len, + }; + self.callback.callback(producer) + } + } + } +} + +struct RevProducer

{ + base: P, + len: usize, +} + +impl

Producer for RevProducer

+where + P: Producer, +{ + type Item = P::Item; + type IntoIter = iter::Rev; + + fn into_iter(self) -> Self::IntoIter { + self.base.into_iter().rev() + } + + fn min_len(&self) -> usize { + self.base.min_len() + } + fn max_len(&self) -> usize { + self.base.max_len() + } + + fn split_at(self, index: usize) -> (Self, Self) { + let (left, right) = self.base.split_at(self.len - index); + ( + RevProducer { + base: right, + len: index, + }, + RevProducer { + base: left, + len: self.len - index, + }, + ) + } +} diff --git a/src/iter/skip.rs b/src/iter/skip.rs new file mode 100644 index 0000000..9983f16 --- /dev/null +++ b/src/iter/skip.rs @@ -0,0 +1,88 @@ +use super::noop::NoopConsumer; +use super::plumbing::*; +use super::*; +use std::cmp::min; + +/// `Skip` is an iterator that skips over the first `n` elements. +/// This struct is created by the [`skip()`] method on [`IndexedParallelIterator`] +/// +/// [`skip()`]: trait.IndexedParallelIterator.html#method.skip +/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html +#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] +#[derive(Debug, Clone)] +pub struct Skip { + base: I, + n: usize, +} + +impl Skip +where + I: IndexedParallelIterator, +{ + /// Creates a new `Skip` iterator. + pub(super) fn new(base: I, n: usize) -> Self { + let n = min(base.len(), n); + Skip { base, n } + } +} + +impl ParallelIterator for Skip +where + I: IndexedParallelIterator, +{ + type Item = I::Item; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + bridge(self, consumer) + } + + fn opt_len(&self) -> Option { + Some(self.len()) + } +} + +impl IndexedParallelIterator for Skip +where + I: IndexedParallelIterator, +{ + fn len(&self) -> usize { + self.base.len() - self.n + } + + fn drive>(self, consumer: C) -> C::Result { + bridge(self, consumer) + } + + fn with_producer(self, callback: CB) -> CB::Output + where + CB: ProducerCallback, + { + return self.base.with_producer(Callback { + callback, + n: self.n, + }); + + struct Callback { + callback: CB, + n: usize, + } + + impl ProducerCallback for Callback + where + CB: ProducerCallback, + { + type Output = CB::Output; + fn callback

(self, base: P) -> CB::Output + where + P: Producer, + { + let (before_skip, after_skip) = base.split_at(self.n); + bridge_producer_consumer(self.n, before_skip, NoopConsumer); + self.callback.callback(after_skip) + } + } + } +} diff --git a/src/iter/splitter.rs b/src/iter/splitter.rs new file mode 100644 index 0000000..40935ac --- /dev/null +++ b/src/iter/splitter.rs @@ -0,0 +1,174 @@ +use super::plumbing::*; +use super::*; + +use std::fmt::{self, Debug}; + +/// The `split` function takes arbitrary data and a closure that knows how to +/// split it, and turns this into a `ParallelIterator`. +/// +/// # Examples +/// +/// As a simple example, Rayon can recursively split ranges of indices +/// +/// ``` +/// use rayon::iter; +/// use rayon::prelude::*; +/// use std::ops::Range; +/// +/// +/// // We define a range of indices as follows +/// type Range1D = Range; +/// +/// // Splitting it in two can be done like this +/// fn split_range1(r: Range1D) -> (Range1D, Option) { +/// // We are mathematically unable to split the range if there is only +/// // one point inside of it, but we could stop splitting before that. +/// if r.end - r.start <= 1 { return (r, None); } +/// +/// // Here, our range is considered large enough to be splittable +/// let midpoint = r.start + (r.end - r.start) / 2; +/// (r.start..midpoint, Some(midpoint..r.end)) +/// } +/// +/// // By using iter::split, Rayon will split the range until it has enough work +/// // to feed the CPU cores, then give us the resulting sub-ranges +/// iter::split(0..4096, split_range1).for_each(|sub_range| { +/// // As our initial range had a power-of-two size, the final sub-ranges +/// // should have power-of-two sizes too +/// assert!((sub_range.end - sub_range.start).is_power_of_two()); +/// }); +/// ``` +/// +/// This recursive splitting can be extended to two or three dimensions, +/// to reproduce a classic "block-wise" parallelization scheme of graphics and +/// numerical simulations: +/// +/// ``` +/// # use rayon::iter; +/// # use rayon::prelude::*; +/// # use std::ops::Range; +/// # type Range1D = Range; +/// # fn split_range1(r: Range1D) -> (Range1D, Option) { +/// # if r.end - r.start <= 1 { return (r, None); } +/// # let midpoint = r.start + (r.end - r.start) / 2; +/// # (r.start..midpoint, Some(midpoint..r.end)) +/// # } +/// # +/// // A two-dimensional range of indices can be built out of two 1D ones +/// struct Range2D { +/// // Range of horizontal indices +/// pub rx: Range1D, +/// +/// // Range of vertical indices +/// pub ry: Range1D, +/// } +/// +/// // We want to recursively split them by the largest dimension until we have +/// // enough sub-ranges to feed our mighty multi-core CPU. This function +/// // carries out one such split. +/// fn split_range2(r2: Range2D) -> (Range2D, Option) { +/// // Decide on which axis (horizontal/vertical) the range should be split +/// let width = r2.rx.end - r2.rx.start; +/// let height = r2.ry.end - r2.ry.start; +/// if width >= height { +/// // This is a wide range, split it on the horizontal axis +/// let (split_rx, ry) = (split_range1(r2.rx), r2.ry); +/// let out1 = Range2D { +/// rx: split_rx.0, +/// ry: ry.clone(), +/// }; +/// let out2 = split_rx.1.map(|rx| Range2D { rx, ry }); +/// (out1, out2) +/// } else { +/// // This is a tall range, split it on the vertical axis +/// let (rx, split_ry) = (r2.rx, split_range1(r2.ry)); +/// let out1 = Range2D { +/// rx: rx.clone(), +/// ry: split_ry.0, +/// }; +/// let out2 = split_ry.1.map(|ry| Range2D { rx, ry, }); +/// (out1, out2) +/// } +/// } +/// +/// // Again, rayon can handle the recursive splitting for us +/// let range = Range2D { rx: 0..800, ry: 0..600 }; +/// iter::split(range, split_range2).for_each(|sub_range| { +/// // If the sub-ranges were indeed split by the largest dimension, then +/// // if no dimension was twice larger than the other initially, this +/// // property will remain true in the final sub-ranges. +/// let width = sub_range.rx.end - sub_range.rx.start; +/// let height = sub_range.ry.end - sub_range.ry.start; +/// assert!((width / 2 <= height) && (height / 2 <= width)); +/// }); +/// ``` +/// +pub fn split(data: D, splitter: S) -> Split +where + D: Send, + S: Fn(D) -> (D, Option) + Sync, +{ + Split { data, splitter } +} + +/// `Split` is a parallel iterator using arbitrary data and a splitting function. +/// This struct is created by the [`split()`] function. +/// +/// [`split()`]: fn.split.html +#[derive(Clone)] +pub struct Split { + data: D, + splitter: S, +} + +impl Debug for Split { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Split").field("data", &self.data).finish() + } +} + +impl ParallelIterator for Split +where + D: Send, + S: Fn(D) -> (D, Option) + Sync + Send, +{ + type Item = D; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + let producer = SplitProducer { + data: self.data, + splitter: &self.splitter, + }; + bridge_unindexed(producer, consumer) + } +} + +struct SplitProducer<'a, D, S> { + data: D, + splitter: &'a S, +} + +impl<'a, D, S> UnindexedProducer for SplitProducer<'a, D, S> +where + D: Send, + S: Fn(D) -> (D, Option) + Sync, +{ + type Item = D; + + fn split(mut self) -> (Self, Option) { + let splitter = self.splitter; + let (left, right) = splitter(self.data); + self.data = left; + (self, right.map(|data| SplitProducer { data, splitter })) + } + + fn fold_with(self, folder: F) -> F + where + F: Folder, + { + folder.consume(self.data) + } +} diff --git a/src/iter/step_by.rs b/src/iter/step_by.rs new file mode 100644 index 0000000..2002f42 --- /dev/null +++ b/src/iter/step_by.rs @@ -0,0 +1,144 @@ +#![cfg(step_by)] +use std::cmp::min; + +use super::plumbing::*; +use super::*; +use crate::math::div_round_up; +use std::iter; +use std::usize; + +/// `StepBy` is an iterator that skips `n` elements between each yield, where `n` is the given step. +/// This struct is created by the [`step_by()`] method on [`IndexedParallelIterator`] +/// +/// [`step_by()`]: trait.IndexedParallelIterator.html#method.step_by +/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html +#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] +#[derive(Debug, Clone)] +pub struct StepBy { + base: I, + step: usize, +} + +impl StepBy +where + I: IndexedParallelIterator, +{ + /// Creates a new `StepBy` iterator. + pub(super) fn new(base: I, step: usize) -> Self { + StepBy { base, step } + } +} + +impl ParallelIterator for StepBy +where + I: IndexedParallelIterator, +{ + type Item = I::Item; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + bridge(self, consumer) + } + + fn opt_len(&self) -> Option { + Some(self.len()) + } +} + +impl IndexedParallelIterator for StepBy +where + I: IndexedParallelIterator, +{ + fn drive>(self, consumer: C) -> C::Result { + bridge(self, consumer) + } + + fn len(&self) -> usize { + div_round_up(self.base.len(), self.step) + } + + fn with_producer(self, callback: CB) -> CB::Output + where + CB: ProducerCallback, + { + let len = self.base.len(); + return self.base.with_producer(Callback { + callback, + step: self.step, + len, + }); + + struct Callback { + callback: CB, + step: usize, + len: usize, + } + + impl ProducerCallback for Callback + where + CB: ProducerCallback, + { + type Output = CB::Output; + fn callback

(self, base: P) -> CB::Output + where + P: Producer, + { + let producer = StepByProducer { + base, + step: self.step, + len: self.len, + }; + self.callback.callback(producer) + } + } + } +} + +/// //////////////////////////////////////////////////////////////////////// +/// Producer implementation + +struct StepByProducer

{ + base: P, + step: usize, + len: usize, +} + +impl

Producer for StepByProducer

+where + P: Producer, +{ + type Item = P::Item; + type IntoIter = iter::StepBy; + + fn into_iter(self) -> Self::IntoIter { + self.base.into_iter().step_by(self.step) + } + + fn split_at(self, index: usize) -> (Self, Self) { + let elem_index = min(index * self.step, self.len); + + let (left, right) = self.base.split_at(elem_index); + ( + StepByProducer { + base: left, + step: self.step, + len: elem_index, + }, + StepByProducer { + base: right, + step: self.step, + len: self.len - elem_index, + }, + ) + } + + fn min_len(&self) -> usize { + div_round_up(self.base.min_len(), self.step) + } + + fn max_len(&self) -> usize { + self.base.max_len() / self.step + } +} diff --git a/src/iter/sum.rs b/src/iter/sum.rs new file mode 100644 index 0000000..a73e0bf --- /dev/null +++ b/src/iter/sum.rs @@ -0,0 +1,110 @@ +use super::plumbing::*; +use super::ParallelIterator; + +use std::iter::{self, Sum}; +use std::marker::PhantomData; + +pub(super) fn sum(pi: PI) -> S +where + PI: ParallelIterator, + S: Send + Sum + Sum, +{ + pi.drive_unindexed(SumConsumer::new()) +} + +fn add(left: T, right: T) -> T { + iter::once(left).chain(iter::once(right)).sum() +} + +struct SumConsumer { + _marker: PhantomData<*const S>, +} + +unsafe impl Send for SumConsumer {} + +impl SumConsumer { + fn new() -> SumConsumer { + SumConsumer { + _marker: PhantomData, + } + } +} + +impl Consumer for SumConsumer +where + S: Send + Sum + Sum, +{ + type Folder = SumFolder; + type Reducer = Self; + type Result = S; + + fn split_at(self, _index: usize) -> (Self, Self, Self) { + (SumConsumer::new(), SumConsumer::new(), SumConsumer::new()) + } + + fn into_folder(self) -> Self::Folder { + SumFolder { + sum: iter::empty::().sum(), + } + } + + fn full(&self) -> bool { + false + } +} + +impl UnindexedConsumer for SumConsumer +where + S: Send + Sum + Sum, +{ + fn split_off_left(&self) -> Self { + SumConsumer::new() + } + + fn to_reducer(&self) -> Self::Reducer { + SumConsumer::new() + } +} + +impl Reducer for SumConsumer +where + S: Send + Sum, +{ + fn reduce(self, left: S, right: S) -> S { + add(left, right) + } +} + +struct SumFolder { + sum: S, +} + +impl Folder for SumFolder +where + S: Sum + Sum, +{ + type Result = S; + + fn consume(self, item: T) -> Self { + SumFolder { + sum: add(self.sum, iter::once(item).sum()), + } + } + + fn consume_iter(self, iter: I) -> Self + where + I: IntoIterator, + { + SumFolder { + sum: add(self.sum, iter.into_iter().sum()), + } + } + + fn complete(self) -> S { + self.sum + } + + fn full(&self) -> bool { + false + } +} diff --git a/src/iter/take.rs b/src/iter/take.rs new file mode 100644 index 0000000..52d15d8 --- /dev/null +++ b/src/iter/take.rs @@ -0,0 +1,86 @@ +use super::plumbing::*; +use super::*; +use std::cmp::min; + +/// `Take` is an iterator that iterates over the first `n` elements. +/// This struct is created by the [`take()`] method on [`IndexedParallelIterator`] +/// +/// [`take()`]: trait.IndexedParallelIterator.html#method.take +/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html +#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] +#[derive(Debug, Clone)] +pub struct Take { + base: I, + n: usize, +} + +impl Take +where + I: IndexedParallelIterator, +{ + /// Creates a new `Take` iterator. + pub(super) fn new(base: I, n: usize) -> Self { + let n = min(base.len(), n); + Take { base, n } + } +} + +impl ParallelIterator for Take +where + I: IndexedParallelIterator, +{ + type Item = I::Item; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + bridge(self, consumer) + } + + fn opt_len(&self) -> Option { + Some(self.len()) + } +} + +impl IndexedParallelIterator for Take +where + I: IndexedParallelIterator, +{ + fn len(&self) -> usize { + self.n + } + + fn drive>(self, consumer: C) -> C::Result { + bridge(self, consumer) + } + + fn with_producer(self, callback: CB) -> CB::Output + where + CB: ProducerCallback, + { + return self.base.with_producer(Callback { + callback, + n: self.n, + }); + + struct Callback { + callback: CB, + n: usize, + } + + impl ProducerCallback for Callback + where + CB: ProducerCallback, + { + type Output = CB::Output; + fn callback

(self, base: P) -> CB::Output + where + P: Producer, + { + let (producer, _) = base.split_at(self.n); + self.callback.callback(producer) + } + } + } +} diff --git a/src/iter/test.rs b/src/iter/test.rs new file mode 100644 index 0000000..bc5106b --- /dev/null +++ b/src/iter/test.rs @@ -0,0 +1,2188 @@ +use std::sync::atomic::{AtomicUsize, Ordering}; + +use super::*; +use crate::prelude::*; +use rayon_core::*; + +use rand::distributions::Standard; +use rand::{Rng, SeedableRng}; +use rand_xorshift::XorShiftRng; +use std::collections::LinkedList; +use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; +use std::collections::{BinaryHeap, VecDeque}; +use std::f64; +use std::fmt::Debug; +use std::sync::mpsc; +use std::usize; + +fn is_indexed(_: T) {} + +fn seeded_rng() -> XorShiftRng { + let mut seed = ::Seed::default(); + (0..).zip(seed.as_mut()).for_each(|(i, x)| *x = i); + XorShiftRng::from_seed(seed) +} + +#[test] +fn execute() { + let a: Vec = (0..1024).collect(); + let mut b = vec![]; + a.par_iter().map(|&i| i + 1).collect_into_vec(&mut b); + let c: Vec = (0..1024).map(|i| i + 1).collect(); + assert_eq!(b, c); +} + +#[test] +fn execute_cloned() { + let a: Vec = (0..1024).collect(); + let mut b: Vec = vec![]; + a.par_iter().cloned().collect_into_vec(&mut b); + let c: Vec = (0..1024).collect(); + assert_eq!(b, c); +} + +#[test] +fn execute_range() { + let a = 0i32..1024; + let mut b = vec![]; + a.into_par_iter().map(|i| i + 1).collect_into_vec(&mut b); + let c: Vec = (0..1024).map(|i| i + 1).collect(); + assert_eq!(b, c); +} + +#[test] +fn execute_unindexed_range() { + let a = 0i64..1024; + let b: LinkedList = a.into_par_iter().map(|i| i + 1).collect(); + let c: LinkedList = (0..1024).map(|i| i + 1).collect(); + assert_eq!(b, c); +} + +#[test] +fn execute_pseudo_indexed_range() { + use std::i128::MAX; + let range = MAX - 1024..MAX; + + // Given `Some` length, collecting `Vec` will try to act indexed. + let a = range.clone().into_par_iter(); + assert_eq!(a.opt_len(), Some(1024)); + + let b: Vec = a.map(|i| i + 1).collect(); + let c: Vec = range.map(|i| i + 1).collect(); + assert_eq!(b, c); +} + +#[test] +fn check_map_indexed() { + let a = [1, 2, 3]; + is_indexed(a.par_iter().map(|x| x)); +} + +#[test] +fn map_sum() { + let a: Vec = (0..1024).collect(); + let r1: i32 = a.par_iter().map(|&i| i + 1).sum(); + let r2 = a.iter().map(|&i| i + 1).sum(); + assert_eq!(r1, r2); +} + +#[test] +fn map_reduce() { + let a: Vec = (0..1024).collect(); + let r1 = a.par_iter().map(|&i| i + 1).reduce(|| 0, |i, j| i + j); + let r2 = a.iter().map(|&i| i + 1).sum(); + assert_eq!(r1, r2); +} + +#[test] +fn map_reduce_with() { + let a: Vec = (0..1024).collect(); + let r1 = a.par_iter().map(|&i| i + 1).reduce_with(|i, j| i + j); + let r2 = a.iter().map(|&i| i + 1).sum(); + assert_eq!(r1, Some(r2)); +} + +#[test] +fn fold_map_reduce() { + // Kind of a weird test, but it demonstrates various + // transformations that are taking place. Relies on + // `with_max_len(1).fold()` being equivalent to `map()`. + // + // Take each number from 0 to 32 and fold them by appending to a + // vector. Because of `with_max_len(1)`, this will produce 32 vectors, + // each with one item. We then collect all of these into an + // individual vector by mapping each into their own vector (so we + // have Vec>) and then reducing those into a single + // vector. + let r1 = (0_i32..32) + .into_par_iter() + .with_max_len(1) + .fold( + || vec![], + |mut v, e| { + v.push(e); + v + }, + ) + .map(|v| vec![v]) + .reduce_with(|mut v_a, v_b| { + v_a.extend(v_b); + v_a + }); + assert_eq!( + r1, + Some(vec![ + vec![0], + vec![1], + vec![2], + vec![3], + vec![4], + vec![5], + vec![6], + vec![7], + vec![8], + vec![9], + vec![10], + vec![11], + vec![12], + vec![13], + vec![14], + vec![15], + vec![16], + vec![17], + vec![18], + vec![19], + vec![20], + vec![21], + vec![22], + vec![23], + vec![24], + vec![25], + vec![26], + vec![27], + vec![28], + vec![29], + vec![30], + vec![31] + ]) + ); +} + +#[test] +fn fold_is_full() { + let counter = AtomicUsize::new(0); + let a = (0_i32..2048) + .into_par_iter() + .inspect(|_| { + counter.fetch_add(1, Ordering::SeqCst); + }) + .fold(|| 0, |a, b| a + b) + .find_any(|_| true); + assert!(a.is_some()); + assert!(counter.load(Ordering::SeqCst) < 2048); // should not have visited every single one +} + +#[test] +fn check_step_by() { + let a: Vec = (0..1024).step_by(2).collect(); + let b: Vec = (0..1024).into_par_iter().step_by(2).collect(); + + assert_eq!(a, b); +} + +#[test] +fn check_step_by_unaligned() { + let a: Vec = (0..1029).step_by(10).collect(); + let b: Vec = (0..1029).into_par_iter().step_by(10).collect(); + + assert_eq!(a, b) +} + +#[test] +fn check_step_by_rev() { + let a: Vec = (0..1024).step_by(2).rev().collect(); + let b: Vec = (0..1024).into_par_iter().step_by(2).rev().collect(); + + assert_eq!(a, b); +} + +#[test] +fn check_enumerate() { + let a: Vec = (0..1024).rev().collect(); + + let mut b = vec![]; + a.par_iter() + .enumerate() + .map(|(i, &x)| i + x) + .collect_into_vec(&mut b); + assert!(b.iter().all(|&x| x == a.len() - 1)); +} + +#[test] +fn check_enumerate_rev() { + let a: Vec = (0..1024).rev().collect(); + + let mut b = vec![]; + a.par_iter() + .enumerate() + .rev() + .map(|(i, &x)| i + x) + .collect_into_vec(&mut b); + assert!(b.iter().all(|&x| x == a.len() - 1)); +} + +#[test] +fn check_indices_after_enumerate_split() { + let a: Vec = (0..1024).collect(); + a.par_iter().enumerate().with_producer(WithProducer); + + struct WithProducer; + impl<'a> ProducerCallback<(usize, &'a i32)> for WithProducer { + type Output = (); + fn callback

(self, producer: P) + where + P: Producer, + { + let (a, b) = producer.split_at(512); + for ((index, value), trusted_index) in a.into_iter().zip(0..) { + assert_eq!(index, trusted_index); + assert_eq!(index, *value as usize); + } + for ((index, value), trusted_index) in b.into_iter().zip(512..) { + assert_eq!(index, trusted_index); + assert_eq!(index, *value as usize); + } + } + } +} + +#[test] +fn check_increment() { + let mut a: Vec = (0..1024).rev().collect(); + + a.par_iter_mut().enumerate().for_each(|(i, v)| *v += i); + + assert!(a.iter().all(|&x| x == a.len() - 1)); +} + +#[test] +fn check_skip() { + let a: Vec = (0..1024).collect(); + + let mut v1 = Vec::new(); + a.par_iter().skip(16).collect_into_vec(&mut v1); + let v2 = a.iter().skip(16).collect::>(); + assert_eq!(v1, v2); + + let mut v1 = Vec::new(); + a.par_iter().skip(2048).collect_into_vec(&mut v1); + let v2 = a.iter().skip(2048).collect::>(); + assert_eq!(v1, v2); + + let mut v1 = Vec::new(); + a.par_iter().skip(0).collect_into_vec(&mut v1); + let v2 = a.iter().skip(0).collect::>(); + assert_eq!(v1, v2); + + // Check that the skipped elements side effects are executed + use std::sync::atomic::{AtomicUsize, Ordering}; + let num = AtomicUsize::new(0); + a.par_iter() + .map(|&n| num.fetch_add(n, Ordering::Relaxed)) + .skip(512) + .count(); + assert_eq!(num.load(Ordering::Relaxed), a.iter().sum::()); +} + +#[test] +fn check_take() { + let a: Vec = (0..1024).collect(); + + let mut v1 = Vec::new(); + a.par_iter().take(16).collect_into_vec(&mut v1); + let v2 = a.iter().take(16).collect::>(); + assert_eq!(v1, v2); + + let mut v1 = Vec::new(); + a.par_iter().take(2048).collect_into_vec(&mut v1); + let v2 = a.iter().take(2048).collect::>(); + assert_eq!(v1, v2); + + let mut v1 = Vec::new(); + a.par_iter().take(0).collect_into_vec(&mut v1); + let v2 = a.iter().take(0).collect::>(); + assert_eq!(v1, v2); +} + +#[test] +fn check_inspect() { + use std::sync::atomic::{AtomicUsize, Ordering}; + + let a = AtomicUsize::new(0); + let b: usize = (0_usize..1024) + .into_par_iter() + .inspect(|&i| { + a.fetch_add(i, Ordering::Relaxed); + }) + .sum(); + + assert_eq!(a.load(Ordering::Relaxed), b); +} + +#[test] +fn check_move() { + let a = vec![vec![1, 2, 3]]; + let ptr = a[0].as_ptr(); + + let mut b = vec![]; + a.into_par_iter().collect_into_vec(&mut b); + + // a simple move means the inner vec will be completely unchanged + assert_eq!(ptr, b[0].as_ptr()); +} + +#[test] +fn check_drops() { + use std::sync::atomic::{AtomicUsize, Ordering}; + + let c = AtomicUsize::new(0); + let a = vec![DropCounter(&c); 10]; + + let mut b = vec![]; + a.clone().into_par_iter().collect_into_vec(&mut b); + assert_eq!(c.load(Ordering::Relaxed), 0); + + b.into_par_iter(); + assert_eq!(c.load(Ordering::Relaxed), 10); + + a.into_par_iter().with_producer(Partial); + assert_eq!(c.load(Ordering::Relaxed), 20); + + #[derive(Clone)] + struct DropCounter<'a>(&'a AtomicUsize); + impl<'a> Drop for DropCounter<'a> { + fn drop(&mut self) { + self.0.fetch_add(1, Ordering::Relaxed); + } + } + + struct Partial; + impl<'a> ProducerCallback> for Partial { + type Output = (); + fn callback

(self, producer: P) + where + P: Producer>, + { + let (a, _) = producer.split_at(5); + a.into_iter().next(); + } + } +} + +#[test] +fn check_slice_indexed() { + let a = vec![1, 2, 3]; + is_indexed(a.par_iter()); +} + +#[test] +fn check_slice_mut_indexed() { + let mut a = vec![1, 2, 3]; + is_indexed(a.par_iter_mut()); +} + +#[test] +fn check_vec_indexed() { + let a = vec![1, 2, 3]; + is_indexed(a.clone().into_par_iter()); +} + +#[test] +fn check_range_indexed() { + is_indexed((1..5).into_par_iter()); +} + +#[test] +fn check_cmp_direct() { + let a = (0..1024).into_par_iter(); + let b = (0..1024).into_par_iter(); + + let result = a.cmp(b); + + assert!(result == ::std::cmp::Ordering::Equal); +} + +#[test] +fn check_cmp_to_seq() { + assert_eq!( + (0..1024).into_par_iter().cmp(0..1024), + (0..1024).cmp(0..1024) + ); +} + +#[test] +fn check_cmp_rng_to_seq() { + let mut rng = seeded_rng(); + let rng = &mut rng; + let a: Vec = rng.sample_iter(&Standard).take(1024).collect(); + let b: Vec = rng.sample_iter(&Standard).take(1024).collect(); + for i in 0..a.len() { + let par_result = a[i..].par_iter().cmp(b[i..].par_iter()); + let seq_result = a[i..].iter().cmp(b[i..].iter()); + + assert_eq!(par_result, seq_result); + } +} + +#[test] +fn check_cmp_lt_direct() { + let a = (0..1024).into_par_iter(); + let b = (1..1024).into_par_iter(); + + let result = a.cmp(b); + + assert!(result == ::std::cmp::Ordering::Less); +} + +#[test] +fn check_cmp_lt_to_seq() { + assert_eq!( + (0..1024).into_par_iter().cmp(1..1024), + (0..1024).cmp(1..1024) + ) +} + +#[test] +fn check_cmp_gt_direct() { + let a = (1..1024).into_par_iter(); + let b = (0..1024).into_par_iter(); + + let result = a.cmp(b); + + assert!(result == ::std::cmp::Ordering::Greater); +} + +#[test] +fn check_cmp_gt_to_seq() { + assert_eq!( + (1..1024).into_par_iter().cmp(0..1024), + (1..1024).cmp(0..1024) + ) +} + +#[test] +fn check_cmp_short_circuit() { + // We only use a single thread in order to make the short-circuit behavior deterministic. + let pool = ThreadPoolBuilder::new().num_threads(1).build().unwrap(); + + let a = vec![0; 1024]; + let mut b = a.clone(); + b[42] = 1; + + pool.install(|| { + let expected = ::std::cmp::Ordering::Less; + assert_eq!(a.par_iter().cmp(&b), expected); + + for len in 1..10 { + let counter = AtomicUsize::new(0); + let result = a + .par_iter() + .with_max_len(len) + .inspect(|_| { + counter.fetch_add(1, Ordering::SeqCst); + }) + .cmp(&b); + assert_eq!(result, expected); + // should not have visited every single one + assert!(counter.into_inner() < a.len()); + } + }); +} + +#[test] +fn check_partial_cmp_short_circuit() { + // We only use a single thread to make the short-circuit behavior deterministic. + let pool = ThreadPoolBuilder::new().num_threads(1).build().unwrap(); + + let a = vec![0; 1024]; + let mut b = a.clone(); + b[42] = 1; + + pool.install(|| { + let expected = Some(::std::cmp::Ordering::Less); + assert_eq!(a.par_iter().partial_cmp(&b), expected); + + for len in 1..10 { + let counter = AtomicUsize::new(0); + let result = a + .par_iter() + .with_max_len(len) + .inspect(|_| { + counter.fetch_add(1, Ordering::SeqCst); + }) + .partial_cmp(&b); + assert_eq!(result, expected); + // should not have visited every single one + assert!(counter.into_inner() < a.len()); + } + }); +} + +#[test] +fn check_partial_cmp_nan_short_circuit() { + // We only use a single thread to make the short-circuit behavior deterministic. + let pool = ThreadPoolBuilder::new().num_threads(1).build().unwrap(); + + let a = vec![0.0; 1024]; + let mut b = a.clone(); + b[42] = f64::NAN; + + pool.install(|| { + let expected = None; + assert_eq!(a.par_iter().partial_cmp(&b), expected); + + for len in 1..10 { + let counter = AtomicUsize::new(0); + let result = a + .par_iter() + .with_max_len(len) + .inspect(|_| { + counter.fetch_add(1, Ordering::SeqCst); + }) + .partial_cmp(&b); + assert_eq!(result, expected); + // should not have visited every single one + assert!(counter.into_inner() < a.len()); + } + }); +} + +#[test] +fn check_partial_cmp_direct() { + let a = (0..1024).into_par_iter(); + let b = (0..1024).into_par_iter(); + + let result = a.partial_cmp(b); + + assert!(result == Some(::std::cmp::Ordering::Equal)); +} + +#[test] +fn check_partial_cmp_to_seq() { + let par_result = (0..1024).into_par_iter().partial_cmp(0..1024); + let seq_result = (0..1024).partial_cmp(0..1024); + assert_eq!(par_result, seq_result); +} + +#[test] +fn check_partial_cmp_rng_to_seq() { + let mut rng = seeded_rng(); + let rng = &mut rng; + let a: Vec = rng.sample_iter(&Standard).take(1024).collect(); + let b: Vec = rng.sample_iter(&Standard).take(1024).collect(); + for i in 0..a.len() { + let par_result = a[i..].par_iter().partial_cmp(b[i..].par_iter()); + let seq_result = a[i..].iter().partial_cmp(b[i..].iter()); + + assert_eq!(par_result, seq_result); + } +} + +#[test] +fn check_partial_cmp_lt_direct() { + let a = (0..1024).into_par_iter(); + let b = (1..1024).into_par_iter(); + + let result = a.partial_cmp(b); + + assert!(result == Some(::std::cmp::Ordering::Less)); +} + +#[test] +fn check_partial_cmp_lt_to_seq() { + let par_result = (0..1024).into_par_iter().partial_cmp(1..1024); + let seq_result = (0..1024).partial_cmp(1..1024); + assert_eq!(par_result, seq_result); +} + +#[test] +fn check_partial_cmp_gt_direct() { + let a = (1..1024).into_par_iter(); + let b = (0..1024).into_par_iter(); + + let result = a.partial_cmp(b); + + assert!(result == Some(::std::cmp::Ordering::Greater)); +} + +#[test] +fn check_partial_cmp_gt_to_seq() { + let par_result = (1..1024).into_par_iter().partial_cmp(0..1024); + let seq_result = (1..1024).partial_cmp(0..1024); + assert_eq!(par_result, seq_result); +} + +#[test] +fn check_partial_cmp_none_direct() { + let a = vec![f64::NAN, 0.0]; + let b = vec![0.0, 1.0]; + + let result = a.par_iter().partial_cmp(b.par_iter()); + + assert!(result == None); +} + +#[test] +fn check_partial_cmp_none_to_seq() { + let a = vec![f64::NAN, 0.0]; + let b = vec![0.0, 1.0]; + + let par_result = a.par_iter().partial_cmp(b.par_iter()); + let seq_result = a.iter().partial_cmp(b.iter()); + + assert_eq!(par_result, seq_result); +} + +#[test] +fn check_partial_cmp_late_nan_direct() { + let a = vec![0.0, f64::NAN]; + let b = vec![1.0, 1.0]; + + let result = a.par_iter().partial_cmp(b.par_iter()); + + assert!(result == Some(::std::cmp::Ordering::Less)); +} + +#[test] +fn check_partial_cmp_late_nane_to_seq() { + let a = vec![0.0, f64::NAN]; + let b = vec![1.0, 1.0]; + + let par_result = a.par_iter().partial_cmp(b.par_iter()); + let seq_result = a.iter().partial_cmp(b.iter()); + + assert_eq!(par_result, seq_result); +} + +#[test] +fn check_cmp_lengths() { + // comparisons should consider length if they are otherwise equal + let a = vec![0; 1024]; + let b = vec![0; 1025]; + + assert_eq!(a.par_iter().cmp(&b), a.iter().cmp(&b)); + assert_eq!(a.par_iter().partial_cmp(&b), a.iter().partial_cmp(&b)); +} + +#[test] +fn check_eq_direct() { + let a = (0..1024).into_par_iter(); + let b = (0..1024).into_par_iter(); + + let result = a.eq(b); + + assert!(result); +} + +#[test] +fn check_eq_to_seq() { + let par_result = (0..1024).into_par_iter().eq((0..1024).into_par_iter()); + let seq_result = (0..1024).eq(0..1024); + + assert_eq!(par_result, seq_result); +} + +#[test] +fn check_ne_direct() { + let a = (0..1024).into_par_iter(); + let b = (1..1024).into_par_iter(); + + let result = a.ne(b); + + assert!(result); +} + +#[test] +fn check_ne_to_seq() { + let par_result = (0..1024).into_par_iter().ne((1..1025).into_par_iter()); + let seq_result = (0..1024).ne(1..1025); + + assert_eq!(par_result, seq_result); +} + +#[test] +fn check_ne_lengths() { + // equality should consider length too + let a = vec![0; 1024]; + let b = vec![0; 1025]; + + assert_eq!(a.par_iter().eq(&b), a.iter().eq(&b)); + assert_eq!(a.par_iter().ne(&b), a.iter().ne(&b)); +} + +#[test] +fn check_lt_direct() { + assert!((0..1024).into_par_iter().lt(1..1024)); + assert!(!(1..1024).into_par_iter().lt(0..1024)); +} + +#[test] +fn check_lt_to_seq() { + let par_result = (0..1024).into_par_iter().lt((1..1024).into_par_iter()); + let seq_result = (0..1024).lt(1..1024); + + assert_eq!(par_result, seq_result); +} + +#[test] +fn check_le_equal_direct() { + assert!((0..1024).into_par_iter().le((0..1024).into_par_iter())); +} + +#[test] +fn check_le_equal_to_seq() { + let par_result = (0..1024).into_par_iter().le((0..1024).into_par_iter()); + let seq_result = (0..1024).le(0..1024); + + assert_eq!(par_result, seq_result); +} + +#[test] +fn check_le_less_direct() { + assert!((0..1024).into_par_iter().le((1..1024).into_par_iter())); +} + +#[test] +fn check_le_less_to_seq() { + let par_result = (0..1024).into_par_iter().le((1..1024).into_par_iter()); + let seq_result = (0..1024).le(1..1024); + + assert_eq!(par_result, seq_result); +} + +#[test] +fn check_gt_direct() { + assert!((1..1024).into_par_iter().gt((0..1024).into_par_iter())); +} + +#[test] +fn check_gt_to_seq() { + let par_result = (1..1024).into_par_iter().gt((0..1024).into_par_iter()); + let seq_result = (1..1024).gt(0..1024); + + assert_eq!(par_result, seq_result); +} + +#[test] +fn check_ge_equal_direct() { + assert!((0..1024).into_par_iter().ge((0..1024).into_par_iter())); +} + +#[test] +fn check_ge_equal_to_seq() { + let par_result = (0..1024).into_par_iter().ge((0..1024).into_par_iter()); + let seq_result = (0..1024).ge(0..1024); + + assert_eq!(par_result, seq_result); +} + +#[test] +fn check_ge_greater_direct() { + assert!((1..1024).into_par_iter().ge((0..1024).into_par_iter())); +} + +#[test] +fn check_ge_greater_to_seq() { + let par_result = (1..1024).into_par_iter().ge((0..1024).into_par_iter()); + let seq_result = (1..1024).ge(0..1024); + + assert_eq!(par_result, seq_result); +} + +#[test] +fn check_zip() { + let mut a: Vec = (0..1024).rev().collect(); + let b: Vec = (0..1024).collect(); + + a.par_iter_mut().zip(&b[..]).for_each(|(a, &b)| *a += b); + + assert!(a.iter().all(|&x| x == a.len() - 1)); +} + +#[test] +fn check_zip_into_par_iter() { + let mut a: Vec = (0..1024).rev().collect(); + let b: Vec = (0..1024).collect(); + + a.par_iter_mut() + .zip(&b) // here we rely on &b iterating over &usize + .for_each(|(a, &b)| *a += b); + + assert!(a.iter().all(|&x| x == a.len() - 1)); +} + +#[test] +fn check_zip_into_mut_par_iter() { + let a: Vec = (0..1024).rev().collect(); + let mut b: Vec = (0..1024).collect(); + + a.par_iter().zip(&mut b).for_each(|(&a, b)| *b += a); + + assert!(b.iter().all(|&x| x == b.len() - 1)); +} + +#[test] +fn check_zip_range() { + let mut a: Vec = (0..1024).rev().collect(); + + a.par_iter_mut() + .zip(0usize..1024) + .for_each(|(a, b)| *a += b); + + assert!(a.iter().all(|&x| x == a.len() - 1)); +} + +#[test] +fn check_zip_eq() { + let mut a: Vec = (0..1024).rev().collect(); + let b: Vec = (0..1024).collect(); + + a.par_iter_mut().zip_eq(&b[..]).for_each(|(a, &b)| *a += b); + + assert!(a.iter().all(|&x| x == a.len() - 1)); +} + +#[test] +fn check_zip_eq_into_par_iter() { + let mut a: Vec = (0..1024).rev().collect(); + let b: Vec = (0..1024).collect(); + + a.par_iter_mut() + .zip_eq(&b) // here we rely on &b iterating over &usize + .for_each(|(a, &b)| *a += b); + + assert!(a.iter().all(|&x| x == a.len() - 1)); +} + +#[test] +fn check_zip_eq_into_mut_par_iter() { + let a: Vec = (0..1024).rev().collect(); + let mut b: Vec = (0..1024).collect(); + + a.par_iter().zip_eq(&mut b).for_each(|(&a, b)| *b += a); + + assert!(b.iter().all(|&x| x == b.len() - 1)); +} + +#[test] +fn check_zip_eq_range() { + let mut a: Vec = (0..1024).rev().collect(); + + a.par_iter_mut() + .zip_eq(0usize..1024) + .for_each(|(a, b)| *a += b); + + assert!(a.iter().all(|&x| x == a.len() - 1)); +} + +#[test] +fn check_sum_filtered_ints() { + let a: Vec = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; + let par_sum_evens: i32 = a.par_iter().filter(|&x| (x & 1) == 0).sum(); + let seq_sum_evens = a.iter().filter(|&x| (x & 1) == 0).sum(); + assert_eq!(par_sum_evens, seq_sum_evens); +} + +#[test] +fn check_sum_filtermap_ints() { + let a: Vec = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; + let par_sum_evens: u32 = a + .par_iter() + .filter_map(|&x| if (x & 1) == 0 { Some(x as u32) } else { None }) + .sum(); + let seq_sum_evens = a + .iter() + .filter_map(|&x| if (x & 1) == 0 { Some(x as u32) } else { None }) + .sum(); + assert_eq!(par_sum_evens, seq_sum_evens); +} + +#[test] +fn check_flat_map_nested_ranges() { + // FIXME -- why are precise type hints required on the integers here? + + let v: i32 = (0_i32..10) + .into_par_iter() + .flat_map(|i| (0_i32..10).into_par_iter().map(move |j| (i, j))) + .map(|(i, j)| i * j) + .sum(); + + let w = (0_i32..10) + .flat_map(|i| (0_i32..10).map(move |j| (i, j))) + .map(|(i, j)| i * j) + .sum(); + + assert_eq!(v, w); +} + +#[test] +fn check_empty_flat_map_sum() { + let a: Vec = (0..1024).collect(); + let empty = &a[..0]; + + // empty on the inside + let b: i32 = a.par_iter().flat_map(|_| empty).sum(); + assert_eq!(b, 0); + + // empty on the outside + let c: i32 = empty.par_iter().flat_map(|_| a.par_iter()).sum(); + assert_eq!(c, 0); +} + +#[test] +fn check_flatten_vec() { + let a: Vec = (0..1024).collect(); + let b: Vec> = vec![a.clone(), a.clone(), a.clone(), a.clone()]; + let c: Vec = b.par_iter().flatten().cloned().collect(); + let mut d = a.clone(); + d.extend(&a); + d.extend(&a); + d.extend(&a); + + assert_eq!(d, c); +} + +#[test] +fn check_flatten_vec_empty() { + let a: Vec> = vec![vec![]]; + let b: Vec = a.par_iter().flatten().cloned().collect(); + + assert_eq!(vec![] as Vec, b); +} + +#[test] +fn check_slice_split() { + let v: Vec<_> = (0..1000).collect(); + for m in 1..100 { + let a: Vec<_> = v.split(|x| x % m == 0).collect(); + let b: Vec<_> = v.par_split(|x| x % m == 0).collect(); + assert_eq!(a, b); + } + + // same as std::slice::split() examples + let slice = [10, 40, 33, 20]; + let v: Vec<_> = slice.par_split(|num| num % 3 == 0).collect(); + assert_eq!(v, &[&slice[..2], &slice[3..]]); + + let slice = [10, 40, 33]; + let v: Vec<_> = slice.par_split(|num| num % 3 == 0).collect(); + assert_eq!(v, &[&slice[..2], &slice[..0]]); + + let slice = [10, 6, 33, 20]; + let v: Vec<_> = slice.par_split(|num| num % 3 == 0).collect(); + assert_eq!(v, &[&slice[..1], &slice[..0], &slice[3..]]); +} + +#[test] +fn check_slice_split_mut() { + let mut v1: Vec<_> = (0..1000).collect(); + let mut v2 = v1.clone(); + for m in 1..100 { + let a: Vec<_> = v1.split_mut(|x| x % m == 0).collect(); + let b: Vec<_> = v2.par_split_mut(|x| x % m == 0).collect(); + assert_eq!(a, b); + } + + // same as std::slice::split_mut() example + let mut v = [10, 40, 30, 20, 60, 50]; + v.par_split_mut(|num| num % 3 == 0).for_each(|group| { + group[0] = 1; + }); + assert_eq!(v, [1, 40, 30, 1, 60, 1]); +} + +#[test] +fn check_chunks() { + let a: Vec = vec![1, 5, 10, 4, 100, 3, 1000, 2, 10000, 1]; + let par_sum_product_pairs: i32 = a.par_chunks(2).map(|c| c.iter().product::()).sum(); + let seq_sum_product_pairs = a.chunks(2).map(|c| c.iter().product::()).sum(); + assert_eq!(par_sum_product_pairs, 12345); + assert_eq!(par_sum_product_pairs, seq_sum_product_pairs); + + let par_sum_product_triples: i32 = a.par_chunks(3).map(|c| c.iter().product::()).sum(); + let seq_sum_product_triples = a.chunks(3).map(|c| c.iter().product::()).sum(); + assert_eq!(par_sum_product_triples, 5_0 + 12_00 + 20_000_000 + 1); + assert_eq!(par_sum_product_triples, seq_sum_product_triples); +} + +#[test] +fn check_chunks_mut() { + let mut a: Vec = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; + let mut b: Vec = a.clone(); + a.par_chunks_mut(2).for_each(|c| c[0] = c.iter().sum()); + b.chunks_mut(2).for_each(|c| c[0] = c.iter().sum()); + assert_eq!(a, &[3, 2, 7, 4, 11, 6, 15, 8, 19, 10]); + assert_eq!(a, b); + + let mut a: Vec = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; + let mut b: Vec = a.clone(); + a.par_chunks_mut(3).for_each(|c| c[0] = c.iter().sum()); + b.chunks_mut(3).for_each(|c| c[0] = c.iter().sum()); + assert_eq!(a, &[6, 2, 3, 15, 5, 6, 24, 8, 9, 10]); + assert_eq!(a, b); +} + +#[test] +fn check_windows() { + let a: Vec = (0..1024).collect(); + let par: Vec<_> = a.par_windows(2).collect(); + let seq: Vec<_> = a.windows(2).collect(); + assert_eq!(par, seq); + + let par: Vec<_> = a.par_windows(100).collect(); + let seq: Vec<_> = a.windows(100).collect(); + assert_eq!(par, seq); + + let par: Vec<_> = a.par_windows(1_000_000).collect(); + let seq: Vec<_> = a.windows(1_000_000).collect(); + assert_eq!(par, seq); + + let par: Vec<_> = a + .par_windows(2) + .chain(a.par_windows(1_000_000)) + .zip(a.par_windows(2)) + .collect(); + let seq: Vec<_> = a + .windows(2) + .chain(a.windows(1_000_000)) + .zip(a.windows(2)) + .collect(); + assert_eq!(par, seq); +} + +#[test] +fn check_options() { + let mut a = vec![None, Some(1), None, None, Some(2), Some(4)]; + + assert_eq!(7, a.par_iter().flat_map(|opt| opt).sum::()); + assert_eq!(7, a.par_iter().flat_map(|opt| opt).sum::()); + + a.par_iter_mut() + .flat_map(|opt| opt) + .for_each(|x| *x = *x * *x); + + assert_eq!(21, a.into_par_iter().flat_map(|opt| opt).sum::()); +} + +#[test] +fn check_results() { + let mut a = vec![Err(()), Ok(1i32), Err(()), Err(()), Ok(2), Ok(4)]; + + assert_eq!(7, a.par_iter().flat_map(|res| res).sum::()); + + assert_eq!(Err::(()), a.par_iter().cloned().sum()); + assert_eq!(Ok(7), a.par_iter().cloned().filter(Result::is_ok).sum()); + + assert_eq!(Err::(()), a.par_iter().cloned().product()); + assert_eq!(Ok(8), a.par_iter().cloned().filter(Result::is_ok).product()); + + a.par_iter_mut() + .flat_map(|res| res) + .for_each(|x| *x = *x * *x); + + assert_eq!(21, a.into_par_iter().flat_map(|res| res).sum::()); +} + +#[test] +fn check_binary_heap() { + use std::collections::BinaryHeap; + + let a: BinaryHeap = (0..10).collect(); + + assert_eq!(45, a.par_iter().sum::()); + assert_eq!(45, a.into_par_iter().sum::()); +} + +#[test] +fn check_btree_map() { + use std::collections::BTreeMap; + + let mut a: BTreeMap = (0..10).map(|i| (i, -i)).collect(); + + assert_eq!(45, a.par_iter().map(|(&k, _)| k).sum::()); + assert_eq!(-45, a.par_iter().map(|(_, &v)| v).sum::()); + + a.par_iter_mut().for_each(|(k, v)| *v += *k); + + assert_eq!(0, a.into_par_iter().map(|(_, v)| v).sum::()); +} + +#[test] +fn check_btree_set() { + use std::collections::BTreeSet; + + let a: BTreeSet = (0..10).collect(); + + assert_eq!(45, a.par_iter().sum::()); + assert_eq!(45, a.into_par_iter().sum::()); +} + +#[test] +fn check_hash_map() { + use std::collections::HashMap; + + let mut a: HashMap = (0..10).map(|i| (i, -i)).collect(); + + assert_eq!(45, a.par_iter().map(|(&k, _)| k).sum::()); + assert_eq!(-45, a.par_iter().map(|(_, &v)| v).sum::()); + + a.par_iter_mut().for_each(|(k, v)| *v += *k); + + assert_eq!(0, a.into_par_iter().map(|(_, v)| v).sum::()); +} + +#[test] +fn check_hash_set() { + use std::collections::HashSet; + + let a: HashSet = (0..10).collect(); + + assert_eq!(45, a.par_iter().sum::()); + assert_eq!(45, a.into_par_iter().sum::()); +} + +#[test] +fn check_linked_list() { + use std::collections::LinkedList; + + let mut a: LinkedList = (0..10).collect(); + + assert_eq!(45, a.par_iter().sum::()); + + a.par_iter_mut().for_each(|x| *x = -*x); + + assert_eq!(-45, a.into_par_iter().sum::()); +} + +#[test] +fn check_vec_deque() { + use std::collections::VecDeque; + + let mut a: VecDeque = (0..10).collect(); + + // try to get it to wrap around + a.drain(..5); + a.extend(0..5); + + assert_eq!(45, a.par_iter().sum::()); + + a.par_iter_mut().for_each(|x| *x = -*x); + + assert_eq!(-45, a.into_par_iter().sum::()); +} + +#[test] +fn check_chain() { + let mut res = vec![]; + + // stays indexed in the face of madness + Some(0) + .into_par_iter() + .chain(Ok::<_, ()>(1)) + .chain(1..4) + .chain(Err("huh?")) + .chain(None) + .chain(vec![5, 8, 13]) + .map(|x| (x as u8 + b'a') as char) + .chain(vec!['x', 'y', 'z']) + .zip((0i32..1000).into_par_iter().map(|x| -x)) + .enumerate() + .map(|(a, (b, c))| (a, b, c)) + .chain(None) + .collect_into_vec(&mut res); + + assert_eq!( + res, + vec![ + (0, 'a', 0), + (1, 'b', -1), + (2, 'b', -2), + (3, 'c', -3), + (4, 'd', -4), + (5, 'f', -5), + (6, 'i', -6), + (7, 'n', -7), + (8, 'x', -8), + (9, 'y', -9), + (10, 'z', -10) + ] + ); + + // unindexed is ok too + let res: Vec = Some(1i32) + .into_par_iter() + .chain( + (2i32..4) + .into_par_iter() + .chain(vec![5, 6, 7, 8, 9]) + .chain(Some((10, 100)).into_par_iter().flat_map(|(a, b)| a..b)) + .filter(|x| x & 1 == 1), + ) + .collect(); + let other: Vec = (0..100).filter(|x| x & 1 == 1).collect(); + assert_eq!(res, other); + + // chain collect is ok with the "fake" specialization + let res: Vec = Some(1i32).into_par_iter().chain(None).collect(); + assert_eq!(res, &[1]); +} + +#[test] +fn check_count() { + let c0 = (0_u32..24 * 1024).filter(|i| i % 2 == 0).count(); + let c1 = (0_u32..24 * 1024) + .into_par_iter() + .filter(|i| i % 2 == 0) + .count(); + assert_eq!(c0, c1); +} + +#[test] +fn find_any() { + let a: Vec = (0..1024).collect(); + + assert!(a.par_iter().find_any(|&&x| x % 42 == 41).is_some()); + assert_eq!( + a.par_iter().find_any(|&&x| x % 19 == 1 && x % 53 == 0), + Some(&742_i32) + ); + assert_eq!(a.par_iter().find_any(|&&x| x < 0), None); + + assert!(a.par_iter().position_any(|&x| x % 42 == 41).is_some()); + assert_eq!( + a.par_iter().position_any(|&x| x % 19 == 1 && x % 53 == 0), + Some(742_usize) + ); + assert_eq!(a.par_iter().position_any(|&x| x < 0), None); + + assert!(a.par_iter().any(|&x| x > 1000)); + assert!(!a.par_iter().any(|&x| x < 0)); + + assert!(!a.par_iter().all(|&x| x > 1000)); + assert!(a.par_iter().all(|&x| x >= 0)); +} + +#[test] +fn find_first_or_last() { + let a: Vec = (0..1024).collect(); + + assert_eq!(a.par_iter().find_first(|&&x| x % 42 == 41), Some(&41_i32)); + assert_eq!( + a.par_iter().find_first(|&&x| x % 19 == 1 && x % 53 == 0), + Some(&742_i32) + ); + assert_eq!(a.par_iter().find_first(|&&x| x < 0), None); + + assert_eq!( + a.par_iter().position_first(|&x| x % 42 == 41), + Some(41_usize) + ); + assert_eq!( + a.par_iter().position_first(|&x| x % 19 == 1 && x % 53 == 0), + Some(742_usize) + ); + assert_eq!(a.par_iter().position_first(|&x| x < 0), None); + + assert_eq!(a.par_iter().find_last(|&&x| x % 42 == 41), Some(&1007_i32)); + assert_eq!( + a.par_iter().find_last(|&&x| x % 19 == 1 && x % 53 == 0), + Some(&742_i32) + ); + assert_eq!(a.par_iter().find_last(|&&x| x < 0), None); + + assert_eq!( + a.par_iter().position_last(|&x| x % 42 == 41), + Some(1007_usize) + ); + assert_eq!( + a.par_iter().position_last(|&x| x % 19 == 1 && x % 53 == 0), + Some(742_usize) + ); + assert_eq!(a.par_iter().position_last(|&x| x < 0), None); +} + +#[test] +fn find_map_first_or_last_or_any() { + let mut a: Vec = vec![]; + + assert!(a.par_iter().find_map_any(half_if_positive).is_none()); + assert!(a.par_iter().find_map_first(half_if_positive).is_none()); + assert!(a.par_iter().find_map_last(half_if_positive).is_none()); + + a = (-1024..-3).collect(); + + assert!(a.par_iter().find_map_any(half_if_positive).is_none()); + assert!(a.par_iter().find_map_first(half_if_positive).is_none()); + assert!(a.par_iter().find_map_last(half_if_positive).is_none()); + + assert!(a.par_iter().find_map_any(half_if_negative).is_some()); + assert_eq!( + a.par_iter().find_map_first(half_if_negative), + Some(-512_i32) + ); + assert_eq!(a.par_iter().find_map_last(half_if_negative), Some(-2_i32)); + + a.append(&mut (2..1025).collect()); + + assert!(a.par_iter().find_map_any(half_if_positive).is_some()); + assert_eq!(a.par_iter().find_map_first(half_if_positive), Some(1_i32)); + assert_eq!(a.par_iter().find_map_last(half_if_positive), Some(512_i32)); + + fn half_if_positive(x: &i32) -> Option { + if *x > 0 { + Some(x / 2) + } else { + None + } + } + + fn half_if_negative(x: &i32) -> Option { + if *x < 0 { + Some(x / 2) + } else { + None + } + } +} + +#[test] +fn check_find_not_present() { + let counter = AtomicUsize::new(0); + let value: Option = (0_i32..2048).into_par_iter().find_any(|&p| { + counter.fetch_add(1, Ordering::SeqCst); + p >= 2048 + }); + assert!(value.is_none()); + assert!(counter.load(Ordering::SeqCst) == 2048); // should have visited every single one +} + +#[test] +fn check_find_is_present() { + let counter = AtomicUsize::new(0); + let value: Option = (0_i32..2048).into_par_iter().find_any(|&p| { + counter.fetch_add(1, Ordering::SeqCst); + p >= 1024 && p < 1096 + }); + let q = value.unwrap(); + assert!(q >= 1024 && q < 1096); + assert!(counter.load(Ordering::SeqCst) < 2048); // should not have visited every single one +} + +#[test] +fn check_while_some() { + let value = (0_i32..2048).into_par_iter().map(Some).while_some().max(); + assert_eq!(value, Some(2047)); + + let counter = AtomicUsize::new(0); + let value = (0_i32..2048) + .into_par_iter() + .map(|x| { + counter.fetch_add(1, Ordering::SeqCst); + if x < 1024 { + Some(x) + } else { + None + } + }) + .while_some() + .max(); + assert!(value < Some(1024)); + assert!(counter.load(Ordering::SeqCst) < 2048); // should not have visited every single one +} + +#[test] +fn par_iter_collect_option() { + let a: Option> = (0_i32..2048).map(Some).collect(); + let b: Option> = (0_i32..2048).into_par_iter().map(Some).collect(); + assert_eq!(a, b); + + let c: Option> = (0_i32..2048) + .into_par_iter() + .map(|x| if x == 1234 { None } else { Some(x) }) + .collect(); + assert_eq!(c, None); +} + +#[test] +fn par_iter_collect_result() { + let a: Result, ()> = (0_i32..2048).map(Ok).collect(); + let b: Result, ()> = (0_i32..2048).into_par_iter().map(Ok).collect(); + assert_eq!(a, b); + + let c: Result, _> = (0_i32..2048) + .into_par_iter() + .map(|x| if x == 1234 { Err(x) } else { Ok(x) }) + .collect(); + assert_eq!(c, Err(1234)); + + let d: Result, _> = (0_i32..2048) + .into_par_iter() + .map(|x| if x % 100 == 99 { Err(x) } else { Ok(x) }) + .collect(); + assert_eq!(d.map_err(|x| x % 100), Err(99)); +} + +#[test] +fn par_iter_collect() { + let a: Vec = (0..1024).collect(); + let b: Vec = a.par_iter().map(|&i| i + 1).collect(); + let c: Vec = (0..1024).map(|i| i + 1).collect(); + assert_eq!(b, c); +} + +#[test] +fn par_iter_collect_vecdeque() { + let a: Vec = (0..1024).collect(); + let b: VecDeque = a.par_iter().cloned().collect(); + let c: VecDeque = a.iter().cloned().collect(); + assert_eq!(b, c); +} + +#[test] +fn par_iter_collect_binaryheap() { + let a: Vec = (0..1024).collect(); + let mut b: BinaryHeap = a.par_iter().cloned().collect(); + assert_eq!(b.peek(), Some(&1023)); + assert_eq!(b.len(), 1024); + for n in (0..1024).rev() { + assert_eq!(b.pop(), Some(n)); + assert_eq!(b.len() as i32, n); + } +} + +#[test] +fn par_iter_collect_hashmap() { + let a: Vec = (0..1024).collect(); + let b: HashMap = a.par_iter().map(|&i| (i, format!("{}", i))).collect(); + assert_eq!(&b[&3], "3"); + assert_eq!(b.len(), 1024); +} + +#[test] +fn par_iter_collect_hashset() { + let a: Vec = (0..1024).collect(); + let b: HashSet = a.par_iter().cloned().collect(); + assert_eq!(b.len(), 1024); +} + +#[test] +fn par_iter_collect_btreemap() { + let a: Vec = (0..1024).collect(); + let b: BTreeMap = a.par_iter().map(|&i| (i, format!("{}", i))).collect(); + assert_eq!(&b[&3], "3"); + assert_eq!(b.len(), 1024); +} + +#[test] +fn par_iter_collect_btreeset() { + let a: Vec = (0..1024).collect(); + let b: BTreeSet = a.par_iter().cloned().collect(); + assert_eq!(b.len(), 1024); +} + +#[test] +fn par_iter_collect_linked_list() { + let a: Vec = (0..1024).collect(); + let b: LinkedList<_> = a.par_iter().map(|&i| (i, format!("{}", i))).collect(); + let c: LinkedList<_> = a.iter().map(|&i| (i, format!("{}", i))).collect(); + assert_eq!(b, c); +} + +#[test] +fn par_iter_collect_linked_list_flat_map_filter() { + let b: LinkedList = (0_i32..1024) + .into_par_iter() + .flat_map(|i| (0..i)) + .filter(|&i| i % 2 == 0) + .collect(); + let c: LinkedList = (0_i32..1024) + .flat_map(|i| (0..i)) + .filter(|&i| i % 2 == 0) + .collect(); + assert_eq!(b, c); +} + +#[test] +fn par_iter_collect_cows() { + use std::borrow::Cow; + + let s = "Fearless Concurrency with Rust"; + + // Collects `i32` into a `Vec` + let a: Cow<'_, [i32]> = (0..1024).collect(); + let b: Cow<'_, [i32]> = a.par_iter().cloned().collect(); + assert_eq!(a, b); + + // Collects `char` into a `String` + let a: Cow<'_, str> = s.chars().collect(); + let b: Cow<'_, str> = s.par_chars().collect(); + assert_eq!(a, b); + + // Collects `str` into a `String` + let a: Cow<'_, str> = s.split_whitespace().collect(); + let b: Cow<'_, str> = s.par_split_whitespace().collect(); + assert_eq!(a, b); + + // Collects `String` into a `String` + let a: Cow<'_, str> = s.split_whitespace().map(str::to_owned).collect(); + let b: Cow<'_, str> = s.par_split_whitespace().map(str::to_owned).collect(); + assert_eq!(a, b); +} + +#[test] +fn par_iter_unindexed_flat_map() { + let b: Vec = (0_i64..1024).into_par_iter().flat_map(Some).collect(); + let c: Vec = (0_i64..1024).flat_map(Some).collect(); + assert_eq!(b, c); +} + +#[test] +fn min_max() { + let rng = seeded_rng(); + let a: Vec = rng.sample_iter(&Standard).take(1024).collect(); + for i in 0..=a.len() { + let slice = &a[..i]; + assert_eq!(slice.par_iter().min(), slice.iter().min()); + assert_eq!(slice.par_iter().max(), slice.iter().max()); + } +} + +#[test] +fn min_max_by() { + let rng = seeded_rng(); + // Make sure there are duplicate keys, for testing sort stability + let r: Vec = rng.sample_iter(&Standard).take(512).collect(); + let a: Vec<(i32, u16)> = r.iter().chain(&r).cloned().zip(0..).collect(); + for i in 0..=a.len() { + let slice = &a[..i]; + assert_eq!( + slice.par_iter().min_by(|x, y| x.0.cmp(&y.0)), + slice.iter().min_by(|x, y| x.0.cmp(&y.0)) + ); + assert_eq!( + slice.par_iter().max_by(|x, y| x.0.cmp(&y.0)), + slice.iter().max_by(|x, y| x.0.cmp(&y.0)) + ); + } +} + +#[test] +fn min_max_by_key() { + let rng = seeded_rng(); + // Make sure there are duplicate keys, for testing sort stability + let r: Vec = rng.sample_iter(&Standard).take(512).collect(); + let a: Vec<(i32, u16)> = r.iter().chain(&r).cloned().zip(0..).collect(); + for i in 0..=a.len() { + let slice = &a[..i]; + assert_eq!( + slice.par_iter().min_by_key(|x| x.0), + slice.iter().min_by_key(|x| x.0) + ); + assert_eq!( + slice.par_iter().max_by_key(|x| x.0), + slice.iter().max_by_key(|x| x.0) + ); + } +} + +#[test] +fn check_rev() { + let a: Vec = (0..1024).rev().collect(); + let b: Vec = (0..1024).collect(); + + assert!(a.par_iter().rev().zip(b).all(|(&a, b)| a == b)); +} + +#[test] +fn scope_mix() { + let counter_p = &AtomicUsize::new(0); + scope(|s| { + s.spawn(move |s| { + divide_and_conquer(s, counter_p, 1024); + }); + s.spawn(move |_| { + let a: Vec = (0..1024).collect(); + let r1 = a.par_iter().map(|&i| i + 1).reduce_with(|i, j| i + j); + let r2 = a.iter().map(|&i| i + 1).sum(); + assert_eq!(r1.unwrap(), r2); + }); + }); +} + +fn divide_and_conquer<'scope>(scope: &Scope<'scope>, counter: &'scope AtomicUsize, size: usize) { + if size > 1 { + scope.spawn(move |scope| divide_and_conquer(scope, counter, size / 2)); + scope.spawn(move |scope| divide_and_conquer(scope, counter, size / 2)); + } else { + // count the leaves + counter.fetch_add(1, Ordering::SeqCst); + } +} + +#[test] +fn check_split() { + use std::ops::Range; + + let a = (0..1024).into_par_iter(); + + let b = split(0..1024, |Range { start, end }| { + let mid = (end - start) / 2; + if mid > start { + (start..mid, Some(mid..end)) + } else { + (start..end, None) + } + }) + .flat_map(|range| range); + + assert_eq!(a.collect::>(), b.collect::>()); +} + +#[test] +fn check_lengths() { + fn check(min: usize, max: usize) { + let range = 0..1024 * 1024; + + // Check against normalized values. + let min_check = cmp::min(cmp::max(min, 1), range.len()); + let max_check = cmp::max(max, min_check.saturating_add(min_check - 1)); + + assert!( + range + .into_par_iter() + .with_min_len(min) + .with_max_len(max) + .fold(|| 0, |count, _| count + 1) + .all(|c| c >= min_check && c <= max_check), + "check_lengths failed {:?} -> {:?} ", + (min, max), + (min_check, max_check) + ); + } + + let lengths = [0, 1, 10, 100, 1_000, 10_000, 100_000, 1_000_000, usize::MAX]; + for &min in &lengths { + for &max in &lengths { + check(min, max); + } + } +} + +#[test] +fn check_map_with() { + let (sender, receiver) = mpsc::channel(); + let a: HashSet<_> = (0..1024).collect(); + + a.par_iter() + .cloned() + .map_with(sender, |s, i| s.send(i).unwrap()) + .count(); + + let b: HashSet<_> = receiver.iter().collect(); + assert_eq!(a, b); +} + +#[test] +fn check_fold_with() { + let (sender, receiver) = mpsc::channel(); + let a: HashSet<_> = (0..1024).collect(); + + a.par_iter() + .cloned() + .fold_with(sender, |s, i| { + s.send(i).unwrap(); + s + }) + .count(); + + let b: HashSet<_> = receiver.iter().collect(); + assert_eq!(a, b); +} + +#[test] +fn check_for_each_with() { + let (sender, receiver) = mpsc::channel(); + let a: HashSet<_> = (0..1024).collect(); + + a.par_iter() + .cloned() + .for_each_with(sender, |s, i| s.send(i).unwrap()); + + let b: HashSet<_> = receiver.iter().collect(); + assert_eq!(a, b); +} + +#[test] +fn check_extend_items() { + fn check() + where + C: Default + + Eq + + Debug + + Extend + + for<'a> Extend<&'a i32> + + ParallelExtend + + for<'a> ParallelExtend<&'a i32>, + { + let mut serial = C::default(); + let mut parallel = C::default(); + + // extend with references + let v: Vec<_> = (0..128).collect(); + serial.extend(&v); + parallel.par_extend(&v); + assert_eq!(serial, parallel); + + // extend with values + serial.extend(-128..0); + parallel.par_extend(-128..0); + assert_eq!(serial, parallel); + } + + check::>(); + check::>(); + check::>(); + check::>(); + check::>(); +} + +#[test] +fn check_extend_heap() { + let mut serial: BinaryHeap<_> = Default::default(); + let mut parallel: BinaryHeap<_> = Default::default(); + + // extend with references + let v: Vec<_> = (0..128).collect(); + serial.extend(&v); + parallel.par_extend(&v); + assert_eq!( + serial.clone().into_sorted_vec(), + parallel.clone().into_sorted_vec() + ); + + // extend with values + serial.extend(-128..0); + parallel.par_extend(-128..0); + assert_eq!(serial.into_sorted_vec(), parallel.into_sorted_vec()); +} + +#[test] +fn check_extend_pairs() { + fn check() + where + C: Default + + Eq + + Debug + + Extend<(usize, i32)> + + for<'a> Extend<(&'a usize, &'a i32)> + + ParallelExtend<(usize, i32)> + + for<'a> ParallelExtend<(&'a usize, &'a i32)>, + { + let mut serial = C::default(); + let mut parallel = C::default(); + + // extend with references + let m: HashMap<_, _> = (0..128).enumerate().collect(); + serial.extend(&m); + parallel.par_extend(&m); + assert_eq!(serial, parallel); + + // extend with values + let v: Vec<(_, _)> = (-128..0).enumerate().collect(); + serial.extend(v.clone()); + parallel.par_extend(v); + assert_eq!(serial, parallel); + } + + check::>(); + check::>(); +} + +#[test] +fn check_unzip_into_vecs() { + let mut a = vec![]; + let mut b = vec![]; + (0..1024) + .into_par_iter() + .map(|i| i * i) + .enumerate() + .unzip_into_vecs(&mut a, &mut b); + + let (c, d): (Vec<_>, Vec<_>) = (0..1024).map(|i| i * i).enumerate().unzip(); + assert_eq!(a, c); + assert_eq!(b, d); +} + +#[test] +fn check_unzip() { + // indexed, unindexed + let (a, b): (Vec<_>, HashSet<_>) = (0..1024).into_par_iter().map(|i| i * i).enumerate().unzip(); + let (c, d): (Vec<_>, HashSet<_>) = (0..1024).map(|i| i * i).enumerate().unzip(); + assert_eq!(a, c); + assert_eq!(b, d); + + // unindexed, indexed + let (a, b): (HashSet<_>, Vec<_>) = (0..1024).into_par_iter().map(|i| i * i).enumerate().unzip(); + let (c, d): (HashSet<_>, Vec<_>) = (0..1024).map(|i| i * i).enumerate().unzip(); + assert_eq!(a, c); + assert_eq!(b, d); + + // indexed, indexed + let (a, b): (Vec<_>, Vec<_>) = (0..1024).into_par_iter().map(|i| i * i).enumerate().unzip(); + let (c, d): (Vec<_>, Vec<_>) = (0..1024).map(|i| i * i).enumerate().unzip(); + assert_eq!(a, c); + assert_eq!(b, d); + + // unindexed producer + let (a, b): (Vec<_>, Vec<_>) = (0..1024) + .into_par_iter() + .filter_map(|i| Some((i, i * i))) + .unzip(); + let (c, d): (Vec<_>, Vec<_>) = (0..1024).map(|i| (i, i * i)).unzip(); + assert_eq!(a, c); + assert_eq!(b, d); +} + +#[test] +fn check_partition() { + let (a, b): (Vec<_>, Vec<_>) = (0..1024).into_par_iter().partition(|&i| i % 3 == 0); + let (c, d): (Vec<_>, Vec<_>) = (0..1024).partition(|&i| i % 3 == 0); + assert_eq!(a, c); + assert_eq!(b, d); +} + +#[test] +fn check_partition_map() { + let input = "a b c 1 2 3 x y z"; + let (a, b): (Vec<_>, String) = + input + .par_split_whitespace() + .partition_map(|s| match s.parse::() { + Ok(n) => Either::Left(n), + Err(_) => Either::Right(s), + }); + assert_eq!(a, vec![1, 2, 3]); + assert_eq!(b, "abcxyz"); +} + +#[test] +fn check_either() { + type I = crate::vec::IntoIter; + type E = Either; + + let v: Vec = (0..1024).collect(); + + // try iterating the left side + let left: E = Either::Left(v.clone().into_par_iter()); + assert!(left.eq(v.clone())); + + // try iterating the right side + let right: E = Either::Right(v.clone().into_par_iter()); + assert!(right.eq(v.clone())); + + // try an indexed iterator + let left: E = Either::Left(v.clone().into_par_iter()); + assert!(left.enumerate().eq(v.clone().into_par_iter().enumerate())); +} + +#[test] +fn check_either_extend() { + type E = Either, HashSet>; + + let v: Vec = (0..1024).collect(); + + // try extending the left side + let mut left: E = Either::Left(vec![]); + left.par_extend(v.clone()); + assert_eq!(left.as_ref(), Either::Left(&v)); + + // try extending the right side + let mut right: E = Either::Right(HashSet::default()); + right.par_extend(v.clone()); + assert_eq!(right, Either::Right(v.iter().cloned().collect())); +} + +#[test] +fn check_interleave_eq() { + let xs: Vec = (0..10).collect(); + let ys: Vec = (10..20).collect(); + + let mut actual = vec![]; + xs.par_iter() + .interleave(&ys) + .map(|&i| i) + .collect_into_vec(&mut actual); + + let expected: Vec = (0..10) + .zip(10..20) + .flat_map(|(i, j)| vec![i, j].into_iter()) + .collect(); + assert_eq!(expected, actual); +} + +#[test] +fn check_interleave_uneven() { + let cases: Vec<(Vec, Vec, Vec)> = vec![ + ( + (0..9).collect(), + vec![10], + vec![0, 10, 1, 2, 3, 4, 5, 6, 7, 8], + ), + ( + vec![10], + (0..9).collect(), + vec![10, 0, 1, 2, 3, 4, 5, 6, 7, 8], + ), + ( + (0..5).collect(), + (5..10).collect(), + (0..5) + .zip(5..10) + .flat_map(|(i, j)| vec![i, j].into_iter()) + .collect(), + ), + (vec![], (0..9).collect(), (0..9).collect()), + ((0..9).collect(), vec![], (0..9).collect()), + ( + (0..50).collect(), + (50..100).collect(), + (0..50) + .zip(50..100) + .flat_map(|(i, j)| vec![i, j].into_iter()) + .collect(), + ), + ]; + + for (i, (xs, ys, expected)) in cases.into_iter().enumerate() { + let mut res = vec![]; + xs.par_iter() + .interleave(&ys) + .map(|&i| i) + .collect_into_vec(&mut res); + assert_eq!(expected, res, "Case {} failed", i); + + res.truncate(0); + xs.par_iter() + .interleave(&ys) + .rev() + .map(|&i| i) + .collect_into_vec(&mut res); + assert_eq!( + expected.into_iter().rev().collect::>(), + res, + "Case {} reversed failed", + i + ); + } +} + +#[test] +fn check_interleave_shortest() { + let cases: Vec<(Vec, Vec, Vec)> = vec![ + ((0..9).collect(), vec![10], vec![0, 10, 1]), + (vec![10], (0..9).collect(), vec![10, 0]), + ( + (0..5).collect(), + (5..10).collect(), + (0..5) + .zip(5..10) + .flat_map(|(i, j)| vec![i, j].into_iter()) + .collect(), + ), + (vec![], (0..9).collect(), vec![]), + ((0..9).collect(), vec![], vec![0]), + ( + (0..50).collect(), + (50..100).collect(), + (0..50) + .zip(50..100) + .flat_map(|(i, j)| vec![i, j].into_iter()) + .collect(), + ), + ]; + + for (i, (xs, ys, expected)) in cases.into_iter().enumerate() { + let mut res = vec![]; + xs.par_iter() + .interleave_shortest(&ys) + .map(|&i| i) + .collect_into_vec(&mut res); + assert_eq!(expected, res, "Case {} failed", i); + + res.truncate(0); + xs.par_iter() + .interleave_shortest(&ys) + .rev() + .map(|&i| i) + .collect_into_vec(&mut res); + assert_eq!( + expected.into_iter().rev().collect::>(), + res, + "Case {} reversed failed", + i + ); + } +} + +#[test] +#[should_panic(expected = "chunk_size must not be zero")] +fn check_chunks_zero_size() { + let _: Vec> = vec![1, 2, 3].into_par_iter().chunks(0).collect(); +} + +#[test] +fn check_chunks_even_size() { + assert_eq!( + vec![vec![1, 2, 3], vec![4, 5, 6], vec![7, 8, 9]], + (1..10).into_par_iter().chunks(3).collect::>>() + ); +} + +#[test] +fn check_chunks_empty() { + let v: Vec = vec![]; + let expected: Vec> = vec![]; + assert_eq!( + expected, + v.into_par_iter().chunks(2).collect::>>() + ); +} + +#[test] +fn check_chunks_len() { + assert_eq!(4, (0..8).into_par_iter().chunks(2).len()); + assert_eq!(3, (0..9).into_par_iter().chunks(3).len()); + assert_eq!(3, (0..8).into_par_iter().chunks(3).len()); + assert_eq!(1, (&[1]).par_iter().chunks(3).len()); + assert_eq!(0, (0..0).into_par_iter().chunks(3).len()); +} + +#[test] +fn check_chunks_uneven() { + let cases: Vec<(Vec, usize, Vec>)> = vec![ + ((0..5).collect(), 3, vec![vec![0, 1, 2], vec![3, 4]]), + (vec![1], 5, vec![vec![1]]), + ((0..4).collect(), 3, vec![vec![0, 1, 2], vec![3]]), + ]; + + for (i, (v, n, expected)) in cases.into_iter().enumerate() { + let mut res: Vec> = vec![]; + v.par_iter() + .chunks(n) + .map(|v| v.into_iter().cloned().collect()) + .collect_into_vec(&mut res); + assert_eq!(expected, res, "Case {} failed", i); + + res.truncate(0); + v.into_par_iter().chunks(n).rev().collect_into_vec(&mut res); + assert_eq!( + expected.into_iter().rev().collect::>>(), + res, + "Case {} reversed failed", + i + ); + } +} + +#[test] +#[ignore] // it's quick enough on optimized 32-bit platforms, but otherwise... ... ... +#[should_panic(expected = "overflow")] +#[cfg(debug_assertions)] +fn check_repeat_unbounded() { + // use just one thread, so we don't get infinite adaptive splitting + // (forever stealing and re-splitting jobs that will panic on overflow) + let pool = ThreadPoolBuilder::new().num_threads(1).build().unwrap(); + pool.install(|| { + println!("counted {} repeats", repeat(()).count()); + }); +} + +#[test] +fn check_repeat_find_any() { + let even = repeat(4).find_any(|&x| x % 2 == 0); + assert_eq!(even, Some(4)); +} + +#[test] +fn check_repeat_take() { + let v: Vec<_> = repeat(4).take(4).collect(); + assert_eq!(v, [4, 4, 4, 4]); +} + +#[test] +fn check_repeat_zip() { + let v = vec![4, 4, 4, 4]; + let mut fours: Vec<_> = repeat(4).zip(v).collect(); + assert_eq!(fours.len(), 4); + while let Some(item) = fours.pop() { + assert_eq!(item, (4, 4)); + } +} + +#[test] +fn check_repeatn_zip_left() { + let v = vec![4, 4, 4, 4]; + let mut fours: Vec<_> = repeatn(4, usize::MAX).zip(v).collect(); + assert_eq!(fours.len(), 4); + while let Some(item) = fours.pop() { + assert_eq!(item, (4, 4)); + } +} + +#[test] +fn check_repeatn_zip_right() { + let v = vec![4, 4, 4, 4]; + let mut fours: Vec<_> = v.into_par_iter().zip(repeatn(4, usize::MAX)).collect(); + assert_eq!(fours.len(), 4); + while let Some(item) = fours.pop() { + assert_eq!(item, (4, 4)); + } +} + +#[test] +fn check_empty() { + // drive_unindexed + let mut v: Vec = empty().filter(|_| unreachable!()).collect(); + assert!(v.is_empty()); + + // drive (indexed) + empty().collect_into_vec(&mut v); + assert!(v.is_empty()); + + // with_producer + let v: Vec<(i32, i32)> = empty().zip(1..10).collect(); + assert!(v.is_empty()); +} + +#[test] +fn check_once() { + // drive_unindexed + let mut v: Vec = once(42).filter(|_| true).collect(); + assert_eq!(v, &[42]); + + // drive (indexed) + once(42).collect_into_vec(&mut v); + assert_eq!(v, &[42]); + + // with_producer + let v: Vec<(i32, i32)> = once(42).zip(1..10).collect(); + assert_eq!(v, &[(42, 1)]); +} + +#[test] +fn check_update() { + let mut v: Vec> = vec![vec![1], vec![3, 2, 1]]; + v.par_iter_mut().update(|v| v.push(0)).for_each(|_| ()); + + assert_eq!(v, vec![vec![1, 0], vec![3, 2, 1, 0]]); +} diff --git a/src/iter/try_fold.rs b/src/iter/try_fold.rs new file mode 100644 index 0000000..c1a57a4 --- /dev/null +++ b/src/iter/try_fold.rs @@ -0,0 +1,294 @@ +use super::plumbing::*; +use super::*; + +use super::private::Try; +use std::fmt::{self, Debug}; +use std::marker::PhantomData; + +impl TryFold +where + I: ParallelIterator, + F: Fn(U::Ok, I::Item) -> U + Sync + Send, + ID: Fn() -> U::Ok + Sync + Send, + U: Try + Send, +{ + pub(super) fn new(base: I, identity: ID, fold_op: F) -> Self { + TryFold { + base, + identity, + fold_op, + marker: PhantomData, + } + } +} + +/// `TryFold` is an iterator that applies a function over an iterator producing a single value. +/// This struct is created by the [`try_fold()`] method on [`ParallelIterator`] +/// +/// [`try_fold()`]: trait.ParallelIterator.html#method.try_fold +/// [`ParallelIterator`]: trait.ParallelIterator.html +#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] +#[derive(Clone)] +pub struct TryFold { + base: I, + identity: ID, + fold_op: F, + marker: PhantomData, +} + +impl Debug for TryFold { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("TryFold").field("base", &self.base).finish() + } +} + +impl ParallelIterator for TryFold +where + I: ParallelIterator, + F: Fn(U::Ok, I::Item) -> U + Sync + Send, + ID: Fn() -> U::Ok + Sync + Send, + U: Try + Send, +{ + type Item = U; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + let consumer1 = TryFoldConsumer { + base: consumer, + identity: &self.identity, + fold_op: &self.fold_op, + marker: PhantomData, + }; + self.base.drive_unindexed(consumer1) + } +} + +struct TryFoldConsumer<'c, U, C, ID, F> { + base: C, + identity: &'c ID, + fold_op: &'c F, + marker: PhantomData, +} + +impl<'r, U, T, C, ID, F> Consumer for TryFoldConsumer<'r, U, C, ID, F> +where + C: Consumer, + F: Fn(U::Ok, T) -> U + Sync, + ID: Fn() -> U::Ok + Sync, + U: Try + Send, +{ + type Folder = TryFoldFolder<'r, C::Folder, U, F>; + type Reducer = C::Reducer; + type Result = C::Result; + + fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) { + let (left, right, reducer) = self.base.split_at(index); + ( + TryFoldConsumer { base: left, ..self }, + TryFoldConsumer { + base: right, + ..self + }, + reducer, + ) + } + + fn into_folder(self) -> Self::Folder { + TryFoldFolder { + base: self.base.into_folder(), + result: Ok((self.identity)()), + fold_op: self.fold_op, + } + } + + fn full(&self) -> bool { + self.base.full() + } +} + +impl<'r, U, T, C, ID, F> UnindexedConsumer for TryFoldConsumer<'r, U, C, ID, F> +where + C: UnindexedConsumer, + F: Fn(U::Ok, T) -> U + Sync, + ID: Fn() -> U::Ok + Sync, + U: Try + Send, +{ + fn split_off_left(&self) -> Self { + TryFoldConsumer { + base: self.base.split_off_left(), + ..*self + } + } + + fn to_reducer(&self) -> Self::Reducer { + self.base.to_reducer() + } +} + +struct TryFoldFolder<'r, C, U: Try, F> { + base: C, + fold_op: &'r F, + result: Result, +} + +impl<'r, C, U, F, T> Folder for TryFoldFolder<'r, C, U, F> +where + C: Folder, + F: Fn(U::Ok, T) -> U + Sync, + U: Try, +{ + type Result = C::Result; + + fn consume(mut self, item: T) -> Self { + let fold_op = self.fold_op; + if let Ok(acc) = self.result { + self.result = fold_op(acc, item).into_result(); + } + self + } + + fn complete(self) -> C::Result { + let item = match self.result { + Ok(ok) => U::from_ok(ok), + Err(error) => U::from_error(error), + }; + self.base.consume(item).complete() + } + + fn full(&self) -> bool { + self.result.is_err() || self.base.full() + } +} + +// /////////////////////////////////////////////////////////////////////////// + +impl TryFoldWith +where + I: ParallelIterator, + F: Fn(U::Ok, I::Item) -> U + Sync, + U: Try + Send, + U::Ok: Clone + Send, +{ + pub(super) fn new(base: I, item: U::Ok, fold_op: F) -> Self { + TryFoldWith { + base, + item, + fold_op, + } + } +} + +/// `TryFoldWith` is an iterator that applies a function over an iterator producing a single value. +/// This struct is created by the [`try_fold_with()`] method on [`ParallelIterator`] +/// +/// [`try_fold_with()`]: trait.ParallelIterator.html#method.try_fold_with +/// [`ParallelIterator`]: trait.ParallelIterator.html +#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] +#[derive(Clone)] +pub struct TryFoldWith { + base: I, + item: U::Ok, + fold_op: F, +} + +impl Debug for TryFoldWith +where + U::Ok: Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("TryFoldWith") + .field("base", &self.base) + .field("item", &self.item) + .finish() + } +} + +impl ParallelIterator for TryFoldWith +where + I: ParallelIterator, + F: Fn(U::Ok, I::Item) -> U + Sync + Send, + U: Try + Send, + U::Ok: Clone + Send, +{ + type Item = U; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + let consumer1 = TryFoldWithConsumer { + base: consumer, + item: self.item, + fold_op: &self.fold_op, + }; + self.base.drive_unindexed(consumer1) + } +} + +struct TryFoldWithConsumer<'c, C, U: Try, F> { + base: C, + item: U::Ok, + fold_op: &'c F, +} + +impl<'r, U, T, C, F> Consumer for TryFoldWithConsumer<'r, C, U, F> +where + C: Consumer, + F: Fn(U::Ok, T) -> U + Sync, + U: Try + Send, + U::Ok: Clone + Send, +{ + type Folder = TryFoldFolder<'r, C::Folder, U, F>; + type Reducer = C::Reducer; + type Result = C::Result; + + fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) { + let (left, right, reducer) = self.base.split_at(index); + ( + TryFoldWithConsumer { + base: left, + item: self.item.clone(), + ..self + }, + TryFoldWithConsumer { + base: right, + ..self + }, + reducer, + ) + } + + fn into_folder(self) -> Self::Folder { + TryFoldFolder { + base: self.base.into_folder(), + result: Ok(self.item), + fold_op: self.fold_op, + } + } + + fn full(&self) -> bool { + self.base.full() + } +} + +impl<'r, U, T, C, F> UnindexedConsumer for TryFoldWithConsumer<'r, C, U, F> +where + C: UnindexedConsumer, + F: Fn(U::Ok, T) -> U + Sync, + U: Try + Send, + U::Ok: Clone + Send, +{ + fn split_off_left(&self) -> Self { + TryFoldWithConsumer { + base: self.base.split_off_left(), + item: self.item.clone(), + ..*self + } + } + + fn to_reducer(&self) -> Self::Reducer { + self.base.to_reducer() + } +} diff --git a/src/iter/try_reduce.rs b/src/iter/try_reduce.rs new file mode 100644 index 0000000..76b3850 --- /dev/null +++ b/src/iter/try_reduce.rs @@ -0,0 +1,129 @@ +use super::plumbing::*; +use super::ParallelIterator; + +use super::private::Try; +use std::sync::atomic::{AtomicBool, Ordering}; + +pub(super) fn try_reduce(pi: PI, identity: ID, reduce_op: R) -> T +where + PI: ParallelIterator, + R: Fn(T::Ok, T::Ok) -> T + Sync, + ID: Fn() -> T::Ok + Sync, + T: Try + Send, +{ + let full = AtomicBool::new(false); + let consumer = TryReduceConsumer { + identity: &identity, + reduce_op: &reduce_op, + full: &full, + }; + pi.drive_unindexed(consumer) +} + +struct TryReduceConsumer<'r, R, ID> { + identity: &'r ID, + reduce_op: &'r R, + full: &'r AtomicBool, +} + +impl<'r, R, ID> Copy for TryReduceConsumer<'r, R, ID> {} + +impl<'r, R, ID> Clone for TryReduceConsumer<'r, R, ID> { + fn clone(&self) -> Self { + *self + } +} + +impl<'r, R, ID, T> Consumer for TryReduceConsumer<'r, R, ID> +where + R: Fn(T::Ok, T::Ok) -> T + Sync, + ID: Fn() -> T::Ok + Sync, + T: Try + Send, +{ + type Folder = TryReduceFolder<'r, R, T>; + type Reducer = Self; + type Result = T; + + fn split_at(self, _index: usize) -> (Self, Self, Self) { + (self, self, self) + } + + fn into_folder(self) -> Self::Folder { + TryReduceFolder { + reduce_op: self.reduce_op, + result: Ok((self.identity)()), + full: self.full, + } + } + + fn full(&self) -> bool { + self.full.load(Ordering::Relaxed) + } +} + +impl<'r, R, ID, T> UnindexedConsumer for TryReduceConsumer<'r, R, ID> +where + R: Fn(T::Ok, T::Ok) -> T + Sync, + ID: Fn() -> T::Ok + Sync, + T: Try + Send, +{ + fn split_off_left(&self) -> Self { + *self + } + + fn to_reducer(&self) -> Self::Reducer { + *self + } +} + +impl<'r, R, ID, T> Reducer for TryReduceConsumer<'r, R, ID> +where + R: Fn(T::Ok, T::Ok) -> T + Sync, + T: Try, +{ + fn reduce(self, left: T, right: T) -> T { + match (left.into_result(), right.into_result()) { + (Ok(left), Ok(right)) => (self.reduce_op)(left, right), + (Err(e), _) | (_, Err(e)) => T::from_error(e), + } + } +} + +struct TryReduceFolder<'r, R, T: Try> { + reduce_op: &'r R, + result: Result, + full: &'r AtomicBool, +} + +impl<'r, R, T> Folder for TryReduceFolder<'r, R, T> +where + R: Fn(T::Ok, T::Ok) -> T, + T: Try, +{ + type Result = T; + + fn consume(mut self, item: T) -> Self { + let reduce_op = self.reduce_op; + if let Ok(left) = self.result { + self.result = match item.into_result() { + Ok(right) => reduce_op(left, right).into_result(), + Err(error) => Err(error), + }; + } + if self.result.is_err() { + self.full.store(true, Ordering::Relaxed) + } + self + } + + fn complete(self) -> T { + match self.result { + Ok(ok) => T::from_ok(ok), + Err(error) => T::from_error(error), + } + } + + fn full(&self) -> bool { + self.full.load(Ordering::Relaxed) + } +} diff --git a/src/iter/try_reduce_with.rs b/src/iter/try_reduce_with.rs new file mode 100644 index 0000000..6be3100 --- /dev/null +++ b/src/iter/try_reduce_with.rs @@ -0,0 +1,134 @@ +use super::plumbing::*; +use super::ParallelIterator; + +use super::private::Try; +use std::sync::atomic::{AtomicBool, Ordering}; + +pub(super) fn try_reduce_with(pi: PI, reduce_op: R) -> Option +where + PI: ParallelIterator, + R: Fn(T::Ok, T::Ok) -> T + Sync, + T: Try + Send, +{ + let full = AtomicBool::new(false); + let consumer = TryReduceWithConsumer { + reduce_op: &reduce_op, + full: &full, + }; + pi.drive_unindexed(consumer) +} + +struct TryReduceWithConsumer<'r, R> { + reduce_op: &'r R, + full: &'r AtomicBool, +} + +impl<'r, R> Copy for TryReduceWithConsumer<'r, R> {} + +impl<'r, R> Clone for TryReduceWithConsumer<'r, R> { + fn clone(&self) -> Self { + *self + } +} + +impl<'r, R, T> Consumer for TryReduceWithConsumer<'r, R> +where + R: Fn(T::Ok, T::Ok) -> T + Sync, + T: Try + Send, +{ + type Folder = TryReduceWithFolder<'r, R, T>; + type Reducer = Self; + type Result = Option; + + fn split_at(self, _index: usize) -> (Self, Self, Self) { + (self, self, self) + } + + fn into_folder(self) -> Self::Folder { + TryReduceWithFolder { + reduce_op: self.reduce_op, + opt_result: None, + full: self.full, + } + } + + fn full(&self) -> bool { + self.full.load(Ordering::Relaxed) + } +} + +impl<'r, R, T> UnindexedConsumer for TryReduceWithConsumer<'r, R> +where + R: Fn(T::Ok, T::Ok) -> T + Sync, + T: Try + Send, +{ + fn split_off_left(&self) -> Self { + *self + } + + fn to_reducer(&self) -> Self::Reducer { + *self + } +} + +impl<'r, R, T> Reducer> for TryReduceWithConsumer<'r, R> +where + R: Fn(T::Ok, T::Ok) -> T + Sync, + T: Try, +{ + fn reduce(self, left: Option, right: Option) -> Option { + let reduce_op = self.reduce_op; + match (left, right) { + (None, x) | (x, None) => x, + (Some(a), Some(b)) => match (a.into_result(), b.into_result()) { + (Ok(a), Ok(b)) => Some(reduce_op(a, b)), + (Err(e), _) | (_, Err(e)) => Some(T::from_error(e)), + }, + } + } +} + +struct TryReduceWithFolder<'r, R, T: Try> { + reduce_op: &'r R, + opt_result: Option>, + full: &'r AtomicBool, +} + +impl<'r, R, T> Folder for TryReduceWithFolder<'r, R, T> +where + R: Fn(T::Ok, T::Ok) -> T, + T: Try, +{ + type Result = Option; + + fn consume(self, item: T) -> Self { + let reduce_op = self.reduce_op; + let result = match self.opt_result { + None => item.into_result(), + Some(Ok(a)) => match item.into_result() { + Ok(b) => reduce_op(a, b).into_result(), + Err(e) => Err(e), + }, + Some(Err(e)) => Err(e), + }; + if result.is_err() { + self.full.store(true, Ordering::Relaxed) + } + TryReduceWithFolder { + opt_result: Some(result), + ..self + } + } + + fn complete(self) -> Option { + let result = self.opt_result?; + Some(match result { + Ok(ok) => T::from_ok(ok), + Err(error) => T::from_error(error), + }) + } + + fn full(&self) -> bool { + self.full.load(Ordering::Relaxed) + } +} diff --git a/src/iter/unzip.rs b/src/iter/unzip.rs new file mode 100644 index 0000000..219b909 --- /dev/null +++ b/src/iter/unzip.rs @@ -0,0 +1,464 @@ +use super::plumbing::*; +use super::*; + +/// This trait abstracts the different ways we can "unzip" one parallel +/// iterator into two distinct consumers, which we can handle almost +/// identically apart from how to process the individual items. +trait UnzipOp: Sync + Send { + /// The type of item expected by the left consumer. + type Left: Send; + + /// The type of item expected by the right consumer. + type Right: Send; + + /// Consumes one item and feeds it to one or both of the underlying folders. + fn consume(&self, item: T, left: FA, right: FB) -> (FA, FB) + where + FA: Folder, + FB: Folder; + + /// Reports whether this op may support indexed consumers. + /// - e.g. true for `unzip` where the item count passed through directly. + /// - e.g. false for `partition` where the sorting is not yet known. + fn indexable() -> bool { + false + } +} + +/// Runs an unzip-like operation into default `ParallelExtend` collections. +fn execute(pi: I, op: OP) -> (FromA, FromB) +where + I: ParallelIterator, + OP: UnzipOp, + FromA: Default + Send + ParallelExtend, + FromB: Default + Send + ParallelExtend, +{ + let mut a = FromA::default(); + let mut b = FromB::default(); + execute_into(&mut a, &mut b, pi, op); + (a, b) +} + +/// Runs an unzip-like operation into `ParallelExtend` collections. +fn execute_into(a: &mut FromA, b: &mut FromB, pi: I, op: OP) +where + I: ParallelIterator, + OP: UnzipOp, + FromA: Send + ParallelExtend, + FromB: Send + ParallelExtend, +{ + // We have no idea what the consumers will look like for these + // collections' `par_extend`, but we can intercept them in our own + // `drive_unindexed`. Start with the left side, type `A`: + let iter = UnzipA { base: pi, op, b }; + a.par_extend(iter); +} + +/// Unzips the items of a parallel iterator into a pair of arbitrary +/// `ParallelExtend` containers. +/// +/// This is called by `ParallelIterator::unzip`. +pub(super) fn unzip(pi: I) -> (FromA, FromB) +where + I: ParallelIterator, + FromA: Default + Send + ParallelExtend, + FromB: Default + Send + ParallelExtend, + A: Send, + B: Send, +{ + execute(pi, Unzip) +} + +/// Unzips an `IndexedParallelIterator` into two arbitrary `Consumer`s. +/// +/// This is called by `super::collect::unzip_into_vecs`. +pub(super) fn unzip_indexed(pi: I, left: CA, right: CB) -> (CA::Result, CB::Result) +where + I: IndexedParallelIterator, + CA: Consumer, + CB: Consumer, + A: Send, + B: Send, +{ + let consumer = UnzipConsumer { + op: &Unzip, + left, + right, + }; + pi.drive(consumer) +} + +/// An `UnzipOp` that splits a tuple directly into the two consumers. +struct Unzip; + +impl UnzipOp<(A, B)> for Unzip { + type Left = A; + type Right = B; + + fn consume(&self, item: (A, B), left: FA, right: FB) -> (FA, FB) + where + FA: Folder, + FB: Folder, + { + (left.consume(item.0), right.consume(item.1)) + } + + fn indexable() -> bool { + true + } +} + +/// Partitions the items of a parallel iterator into a pair of arbitrary +/// `ParallelExtend` containers. +/// +/// This is called by `ParallelIterator::partition`. +pub(super) fn partition(pi: I, predicate: P) -> (A, B) +where + I: ParallelIterator, + A: Default + Send + ParallelExtend, + B: Default + Send + ParallelExtend, + P: Fn(&I::Item) -> bool + Sync + Send, +{ + execute(pi, Partition { predicate }) +} + +/// An `UnzipOp` that routes items depending on a predicate function. +struct Partition

{ + predicate: P, +} + +impl UnzipOp for Partition

+where + P: Fn(&T) -> bool + Sync + Send, + T: Send, +{ + type Left = T; + type Right = T; + + fn consume(&self, item: T, left: FA, right: FB) -> (FA, FB) + where + FA: Folder, + FB: Folder, + { + if (self.predicate)(&item) { + (left.consume(item), right) + } else { + (left, right.consume(item)) + } + } +} + +/// Partitions and maps the items of a parallel iterator into a pair of +/// arbitrary `ParallelExtend` containers. +/// +/// This called by `ParallelIterator::partition_map`. +pub(super) fn partition_map(pi: I, predicate: P) -> (A, B) +where + I: ParallelIterator, + A: Default + Send + ParallelExtend, + B: Default + Send + ParallelExtend, + P: Fn(I::Item) -> Either + Sync + Send, + L: Send, + R: Send, +{ + execute(pi, PartitionMap { predicate }) +} + +/// An `UnzipOp` that routes items depending on how they are mapped `Either`. +struct PartitionMap

{ + predicate: P, +} + +impl UnzipOp for PartitionMap

+where + P: Fn(T) -> Either + Sync + Send, + L: Send, + R: Send, +{ + type Left = L; + type Right = R; + + fn consume(&self, item: T, left: FA, right: FB) -> (FA, FB) + where + FA: Folder, + FB: Folder, + { + match (self.predicate)(item) { + Either::Left(item) => (left.consume(item), right), + Either::Right(item) => (left, right.consume(item)), + } + } +} + +/// A fake iterator to intercept the `Consumer` for type `A`. +struct UnzipA<'b, I, OP, FromB> { + base: I, + op: OP, + b: &'b mut FromB, +} + +impl<'b, I, OP, FromB> ParallelIterator for UnzipA<'b, I, OP, FromB> +where + I: ParallelIterator, + OP: UnzipOp, + FromB: Send + ParallelExtend, +{ + type Item = OP::Left; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + let mut result = None; + { + // Now it's time to find the consumer for type `B` + let iter = UnzipB { + base: self.base, + op: self.op, + left_consumer: consumer, + left_result: &mut result, + }; + self.b.par_extend(iter); + } + // NB: If for some reason `b.par_extend` doesn't actually drive the + // iterator, then we won't have a result for the left side to return + // at all. We can't fake an arbitrary consumer's result, so panic. + result.expect("unzip consumers didn't execute!") + } + + fn opt_len(&self) -> Option { + if OP::indexable() { + self.base.opt_len() + } else { + None + } + } +} + +/// A fake iterator to intercept the `Consumer` for type `B`. +struct UnzipB<'r, I, OP, CA> +where + I: ParallelIterator, + OP: UnzipOp, + CA: UnindexedConsumer, + CA::Result: 'r, +{ + base: I, + op: OP, + left_consumer: CA, + left_result: &'r mut Option, +} + +impl<'r, I, OP, CA> ParallelIterator for UnzipB<'r, I, OP, CA> +where + I: ParallelIterator, + OP: UnzipOp, + CA: UnindexedConsumer, +{ + type Item = OP::Right; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + // Now that we have two consumers, we can unzip the real iterator. + let consumer = UnzipConsumer { + op: &self.op, + left: self.left_consumer, + right: consumer, + }; + + let result = self.base.drive_unindexed(consumer); + *self.left_result = Some(result.0); + result.1 + } + + fn opt_len(&self) -> Option { + if OP::indexable() { + self.base.opt_len() + } else { + None + } + } +} + +/// `Consumer` that unzips into two other `Consumer`s +struct UnzipConsumer<'a, OP, CA, CB> { + op: &'a OP, + left: CA, + right: CB, +} + +impl<'a, T, OP, CA, CB> Consumer for UnzipConsumer<'a, OP, CA, CB> +where + OP: UnzipOp, + CA: Consumer, + CB: Consumer, +{ + type Folder = UnzipFolder<'a, OP, CA::Folder, CB::Folder>; + type Reducer = UnzipReducer; + type Result = (CA::Result, CB::Result); + + fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) { + let (left1, left2, left_reducer) = self.left.split_at(index); + let (right1, right2, right_reducer) = self.right.split_at(index); + + ( + UnzipConsumer { + op: self.op, + left: left1, + right: right1, + }, + UnzipConsumer { + op: self.op, + left: left2, + right: right2, + }, + UnzipReducer { + left: left_reducer, + right: right_reducer, + }, + ) + } + + fn into_folder(self) -> Self::Folder { + UnzipFolder { + op: self.op, + left: self.left.into_folder(), + right: self.right.into_folder(), + } + } + + fn full(&self) -> bool { + // don't stop until everyone is full + self.left.full() && self.right.full() + } +} + +impl<'a, T, OP, CA, CB> UnindexedConsumer for UnzipConsumer<'a, OP, CA, CB> +where + OP: UnzipOp, + CA: UnindexedConsumer, + CB: UnindexedConsumer, +{ + fn split_off_left(&self) -> Self { + UnzipConsumer { + op: self.op, + left: self.left.split_off_left(), + right: self.right.split_off_left(), + } + } + + fn to_reducer(&self) -> Self::Reducer { + UnzipReducer { + left: self.left.to_reducer(), + right: self.right.to_reducer(), + } + } +} + +/// `Folder` that unzips into two other `Folder`s +struct UnzipFolder<'a, OP, FA, FB> { + op: &'a OP, + left: FA, + right: FB, +} + +impl<'a, T, OP, FA, FB> Folder for UnzipFolder<'a, OP, FA, FB> +where + OP: UnzipOp, + FA: Folder, + FB: Folder, +{ + type Result = (FA::Result, FB::Result); + + fn consume(self, item: T) -> Self { + let (left, right) = self.op.consume(item, self.left, self.right); + UnzipFolder { + op: self.op, + left, + right, + } + } + + fn complete(self) -> Self::Result { + (self.left.complete(), self.right.complete()) + } + + fn full(&self) -> bool { + // don't stop until everyone is full + self.left.full() && self.right.full() + } +} + +/// `Reducer` that unzips into two other `Reducer`s +struct UnzipReducer { + left: RA, + right: RB, +} + +impl Reducer<(A, B)> for UnzipReducer +where + RA: Reducer, + RB: Reducer, +{ + fn reduce(self, left: (A, B), right: (A, B)) -> (A, B) { + ( + self.left.reduce(left.0, right.0), + self.right.reduce(left.1, right.1), + ) + } +} + +impl ParallelExtend<(A, B)> for (FromA, FromB) +where + A: Send, + B: Send, + FromA: Send + ParallelExtend, + FromB: Send + ParallelExtend, +{ + fn par_extend(&mut self, pi: I) + where + I: IntoParallelIterator, + { + execute_into(&mut self.0, &mut self.1, pi.into_par_iter(), Unzip); + } +} + +impl ParallelExtend> for (A, B) +where + L: Send, + R: Send, + A: Send + ParallelExtend, + B: Send + ParallelExtend, +{ + fn par_extend(&mut self, pi: I) + where + I: IntoParallelIterator>, + { + execute_into(&mut self.0, &mut self.1, pi.into_par_iter(), UnEither); + } +} + +/// An `UnzipOp` that routes items depending on their `Either` variant. +struct UnEither; + +impl UnzipOp> for UnEither +where + L: Send, + R: Send, +{ + type Left = L; + type Right = R; + + fn consume(&self, item: Either, left: FL, right: FR) -> (FL, FR) + where + FL: Folder, + FR: Folder, + { + match item { + Either::Left(item) => (left.consume(item), right), + Either::Right(item) => (left, right.consume(item)), + } + } +} diff --git a/src/iter/update.rs b/src/iter/update.rs new file mode 100644 index 0000000..373a4d7 --- /dev/null +++ b/src/iter/update.rs @@ -0,0 +1,327 @@ +use super::plumbing::*; +use super::*; + +use std::fmt::{self, Debug}; + +/// `Update` is an iterator that mutates the elements of an +/// underlying iterator before they are yielded. +/// +/// This struct is created by the [`update()`] method on [`ParallelIterator`] +/// +/// [`update()`]: trait.ParallelIterator.html#method.update +/// [`ParallelIterator`]: trait.ParallelIterator.html +#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] +#[derive(Clone)] +pub struct Update { + base: I, + update_op: F, +} + +impl Debug for Update { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Update").field("base", &self.base).finish() + } +} + +impl Update +where + I: ParallelIterator, +{ + /// Creates a new `Update` iterator. + pub(super) fn new(base: I, update_op: F) -> Self { + Update { base, update_op } + } +} + +impl ParallelIterator for Update +where + I: ParallelIterator, + F: Fn(&mut I::Item) + Send + Sync, +{ + type Item = I::Item; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + let consumer1 = UpdateConsumer::new(consumer, &self.update_op); + self.base.drive_unindexed(consumer1) + } + + fn opt_len(&self) -> Option { + self.base.opt_len() + } +} + +impl IndexedParallelIterator for Update +where + I: IndexedParallelIterator, + F: Fn(&mut I::Item) + Send + Sync, +{ + fn drive(self, consumer: C) -> C::Result + where + C: Consumer, + { + let consumer1 = UpdateConsumer::new(consumer, &self.update_op); + self.base.drive(consumer1) + } + + fn len(&self) -> usize { + self.base.len() + } + + fn with_producer(self, callback: CB) -> CB::Output + where + CB: ProducerCallback, + { + return self.base.with_producer(Callback { + callback, + update_op: self.update_op, + }); + + struct Callback { + callback: CB, + update_op: F, + } + + impl ProducerCallback for Callback + where + CB: ProducerCallback, + F: Fn(&mut T) + Send + Sync, + { + type Output = CB::Output; + + fn callback

(self, base: P) -> CB::Output + where + P: Producer, + { + let producer = UpdateProducer { + base, + update_op: &self.update_op, + }; + self.callback.callback(producer) + } + } + } +} + +/// //////////////////////////////////////////////////////////////////////// + +struct UpdateProducer<'f, P, F> { + base: P, + update_op: &'f F, +} + +impl<'f, P, F> Producer for UpdateProducer<'f, P, F> +where + P: Producer, + F: Fn(&mut P::Item) + Send + Sync, +{ + type Item = P::Item; + type IntoIter = UpdateSeq; + + fn into_iter(self) -> Self::IntoIter { + UpdateSeq { + base: self.base.into_iter(), + update_op: self.update_op, + } + } + + fn min_len(&self) -> usize { + self.base.min_len() + } + fn max_len(&self) -> usize { + self.base.max_len() + } + + fn split_at(self, index: usize) -> (Self, Self) { + let (left, right) = self.base.split_at(index); + ( + UpdateProducer { + base: left, + update_op: self.update_op, + }, + UpdateProducer { + base: right, + update_op: self.update_op, + }, + ) + } + + fn fold_with(self, folder: G) -> G + where + G: Folder, + { + let folder1 = UpdateFolder { + base: folder, + update_op: self.update_op, + }; + self.base.fold_with(folder1).base + } +} + +/// //////////////////////////////////////////////////////////////////////// +/// Consumer implementation + +struct UpdateConsumer<'f, C, F> { + base: C, + update_op: &'f F, +} + +impl<'f, C, F> UpdateConsumer<'f, C, F> { + fn new(base: C, update_op: &'f F) -> Self { + UpdateConsumer { base, update_op } + } +} + +impl<'f, T, C, F> Consumer for UpdateConsumer<'f, C, F> +where + C: Consumer, + F: Fn(&mut T) + Send + Sync, +{ + type Folder = UpdateFolder<'f, C::Folder, F>; + type Reducer = C::Reducer; + type Result = C::Result; + + fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) { + let (left, right, reducer) = self.base.split_at(index); + ( + UpdateConsumer::new(left, self.update_op), + UpdateConsumer::new(right, self.update_op), + reducer, + ) + } + + fn into_folder(self) -> Self::Folder { + UpdateFolder { + base: self.base.into_folder(), + update_op: self.update_op, + } + } + + fn full(&self) -> bool { + self.base.full() + } +} + +impl<'f, T, C, F> UnindexedConsumer for UpdateConsumer<'f, C, F> +where + C: UnindexedConsumer, + F: Fn(&mut T) + Send + Sync, +{ + fn split_off_left(&self) -> Self { + UpdateConsumer::new(self.base.split_off_left(), &self.update_op) + } + + fn to_reducer(&self) -> Self::Reducer { + self.base.to_reducer() + } +} + +struct UpdateFolder<'f, C, F> { + base: C, + update_op: &'f F, +} + +fn apply(update_op: impl Fn(&mut T)) -> impl Fn(T) -> T { + move |mut item| { + update_op(&mut item); + item + } +} + +impl<'f, T, C, F> Folder for UpdateFolder<'f, C, F> +where + C: Folder, + F: Fn(&mut T), +{ + type Result = C::Result; + + fn consume(self, mut item: T) -> Self { + (self.update_op)(&mut item); + + UpdateFolder { + base: self.base.consume(item), + update_op: self.update_op, + } + } + + fn consume_iter(mut self, iter: I) -> Self + where + I: IntoIterator, + { + let update_op = self.update_op; + self.base = self + .base + .consume_iter(iter.into_iter().map(apply(update_op))); + self + } + + fn complete(self) -> C::Result { + self.base.complete() + } + + fn full(&self) -> bool { + self.base.full() + } +} + +/// Standard Update adaptor, based on `itertools::adaptors::Update` +#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] +#[derive(Debug, Clone)] +struct UpdateSeq { + base: I, + update_op: F, +} + +impl Iterator for UpdateSeq +where + I: Iterator, + F: Fn(&mut I::Item), +{ + type Item = I::Item; + + fn next(&mut self) -> Option { + let mut v = self.base.next()?; + (self.update_op)(&mut v); + Some(v) + } + + fn size_hint(&self) -> (usize, Option) { + self.base.size_hint() + } + + fn fold(self, init: Acc, g: G) -> Acc + where + G: FnMut(Acc, Self::Item) -> Acc, + { + self.base.map(apply(self.update_op)).fold(init, g) + } + + // if possible, re-use inner iterator specializations in collect + fn collect(self) -> C + where + C: ::std::iter::FromIterator, + { + self.base.map(apply(self.update_op)).collect() + } +} + +impl ExactSizeIterator for UpdateSeq +where + I: ExactSizeIterator, + F: Fn(&mut I::Item), +{ +} + +impl DoubleEndedIterator for UpdateSeq +where + I: DoubleEndedIterator, + F: Fn(&mut I::Item), +{ + fn next_back(&mut self) -> Option { + let mut v = self.base.next_back()?; + (self.update_op)(&mut v); + Some(v) + } +} diff --git a/src/iter/while_some.rs b/src/iter/while_some.rs new file mode 100644 index 0000000..215047b --- /dev/null +++ b/src/iter/while_some.rs @@ -0,0 +1,154 @@ +use super::plumbing::*; +use super::*; +use std::sync::atomic::{AtomicBool, Ordering}; + +/// `WhileSome` is an iterator that yields the `Some` elements of an iterator, +/// halting as soon as any `None` is produced. +/// +/// This struct is created by the [`while_some()`] method on [`ParallelIterator`] +/// +/// [`while_some()`]: trait.ParallelIterator.html#method.while_some +/// [`ParallelIterator`]: trait.ParallelIterator.html +#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] +#[derive(Debug, Clone)] +pub struct WhileSome { + base: I, +} + +impl WhileSome +where + I: ParallelIterator, +{ + /// Creates a new `WhileSome` iterator. + pub(super) fn new(base: I) -> Self { + WhileSome { base } + } +} + +impl ParallelIterator for WhileSome +where + I: ParallelIterator>, + T: Send, +{ + type Item = T; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + let full = AtomicBool::new(false); + let consumer1 = WhileSomeConsumer { + base: consumer, + full: &full, + }; + self.base.drive_unindexed(consumer1) + } +} + +/// //////////////////////////////////////////////////////////////////////// +/// Consumer implementation + +struct WhileSomeConsumer<'f, C> { + base: C, + full: &'f AtomicBool, +} + +impl<'f, T, C> Consumer> for WhileSomeConsumer<'f, C> +where + C: Consumer, + T: Send, +{ + type Folder = WhileSomeFolder<'f, C::Folder>; + type Reducer = C::Reducer; + type Result = C::Result; + + fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) { + let (left, right, reducer) = self.base.split_at(index); + ( + WhileSomeConsumer { base: left, ..self }, + WhileSomeConsumer { + base: right, + ..self + }, + reducer, + ) + } + + fn into_folder(self) -> Self::Folder { + WhileSomeFolder { + base: self.base.into_folder(), + full: self.full, + } + } + + fn full(&self) -> bool { + self.full.load(Ordering::Relaxed) || self.base.full() + } +} + +impl<'f, T, C> UnindexedConsumer> for WhileSomeConsumer<'f, C> +where + C: UnindexedConsumer, + T: Send, +{ + fn split_off_left(&self) -> Self { + WhileSomeConsumer { + base: self.base.split_off_left(), + ..*self + } + } + + fn to_reducer(&self) -> Self::Reducer { + self.base.to_reducer() + } +} + +struct WhileSomeFolder<'f, C> { + base: C, + full: &'f AtomicBool, +} + +impl<'f, T, C> Folder> for WhileSomeFolder<'f, C> +where + C: Folder, +{ + type Result = C::Result; + + fn consume(mut self, item: Option) -> Self { + match item { + Some(item) => self.base = self.base.consume(item), + None => self.full.store(true, Ordering::Relaxed), + } + self + } + + fn consume_iter(mut self, iter: I) -> Self + where + I: IntoIterator>, + { + fn some(full: &AtomicBool) -> impl Fn(&Option) -> bool + '_ { + move |x| match *x { + Some(_) => !full.load(Ordering::Relaxed), + None => { + full.store(true, Ordering::Relaxed); + false + } + } + } + + self.base = self.base.consume_iter( + iter.into_iter() + .take_while(some(self.full)) + .map(Option::unwrap), + ); + self + } + + fn complete(self) -> C::Result { + self.base.complete() + } + + fn full(&self) -> bool { + self.full.load(Ordering::Relaxed) || self.base.full() + } +} diff --git a/src/iter/zip.rs b/src/iter/zip.rs new file mode 100644 index 0000000..33823db --- /dev/null +++ b/src/iter/zip.rs @@ -0,0 +1,159 @@ +use super::plumbing::*; +use super::*; +use std::cmp; +use std::iter; + +/// `Zip` is an iterator that zips up `a` and `b` into a single iterator +/// of pairs. This struct is created by the [`zip()`] method on +/// [`IndexedParallelIterator`] +/// +/// [`zip()`]: trait.IndexedParallelIterator.html#method.zip +/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html +#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] +#[derive(Debug, Clone)] +pub struct Zip { + a: A, + b: B, +} + +impl Zip +where + A: IndexedParallelIterator, + B: IndexedParallelIterator, +{ + /// Creates a new `Zip` iterator. + pub(super) fn new(a: A, b: B) -> Self { + Zip { a, b } + } +} + +impl ParallelIterator for Zip +where + A: IndexedParallelIterator, + B: IndexedParallelIterator, +{ + type Item = (A::Item, B::Item); + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + bridge(self, consumer) + } + + fn opt_len(&self) -> Option { + Some(self.len()) + } +} + +impl IndexedParallelIterator for Zip +where + A: IndexedParallelIterator, + B: IndexedParallelIterator, +{ + fn drive(self, consumer: C) -> C::Result + where + C: Consumer, + { + bridge(self, consumer) + } + + fn len(&self) -> usize { + cmp::min(self.a.len(), self.b.len()) + } + + fn with_producer(self, callback: CB) -> CB::Output + where + CB: ProducerCallback, + { + return self.a.with_producer(CallbackA { + callback, + b: self.b, + }); + + struct CallbackA { + callback: CB, + b: B, + } + + impl ProducerCallback for CallbackA + where + B: IndexedParallelIterator, + CB: ProducerCallback<(ITEM, B::Item)>, + { + type Output = CB::Output; + + fn callback(self, a_producer: A) -> Self::Output + where + A: Producer, + { + self.b.with_producer(CallbackB { + a_producer, + callback: self.callback, + }) + } + } + + struct CallbackB { + a_producer: A, + callback: CB, + } + + impl ProducerCallback for CallbackB + where + A: Producer, + CB: ProducerCallback<(A::Item, ITEM)>, + { + type Output = CB::Output; + + fn callback(self, b_producer: B) -> Self::Output + where + B: Producer, + { + self.callback.callback(ZipProducer { + a: self.a_producer, + b: b_producer, + }) + } + } + } +} + +/// //////////////////////////////////////////////////////////////////////// + +struct ZipProducer { + a: A, + b: B, +} + +impl Producer for ZipProducer { + type Item = (A::Item, B::Item); + type IntoIter = iter::Zip; + + fn into_iter(self) -> Self::IntoIter { + self.a.into_iter().zip(self.b.into_iter()) + } + + fn min_len(&self) -> usize { + cmp::max(self.a.min_len(), self.b.min_len()) + } + + fn max_len(&self) -> usize { + cmp::min(self.a.max_len(), self.b.max_len()) + } + + fn split_at(self, index: usize) -> (Self, Self) { + let (a_left, a_right) = self.a.split_at(index); + let (b_left, b_right) = self.b.split_at(index); + ( + ZipProducer { + a: a_left, + b: b_left, + }, + ZipProducer { + a: a_right, + b: b_right, + }, + ) + } +} diff --git a/src/iter/zip_eq.rs b/src/iter/zip_eq.rs new file mode 100644 index 0000000..4e64397 --- /dev/null +++ b/src/iter/zip_eq.rs @@ -0,0 +1,72 @@ +use super::plumbing::*; +use super::*; + +/// An [`IndexedParallelIterator`] that iterates over two parallel iterators of equal +/// length simultaneously. +/// +/// This struct is created by the [`zip_eq`] method on [`IndexedParallelIterator`], +/// see its documentation for more information. +/// +/// [`zip_eq`]: trait.IndexedParallelIterator.html#method.zip_eq +/// [`IndexedParallelIterator`]: trait.IndexedParallelIterator.html +#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] +#[derive(Debug, Clone)] +pub struct ZipEq { + zip: Zip, +} + +impl ZipEq +where + A: IndexedParallelIterator, + B: IndexedParallelIterator, +{ + /// Creates a new `ZipEq` iterator. + pub(super) fn new(a: A, b: B) -> Self { + ZipEq { + zip: super::Zip::new(a, b), + } + } +} + +impl ParallelIterator for ZipEq +where + A: IndexedParallelIterator, + B: IndexedParallelIterator, +{ + type Item = (A::Item, B::Item); + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + bridge(self.zip, consumer) + } + + fn opt_len(&self) -> Option { + Some(self.zip.len()) + } +} + +impl IndexedParallelIterator for ZipEq +where + A: IndexedParallelIterator, + B: IndexedParallelIterator, +{ + fn drive(self, consumer: C) -> C::Result + where + C: Consumer, + { + bridge(self.zip, consumer) + } + + fn len(&self) -> usize { + self.zip.len() + } + + fn with_producer(self, callback: CB) -> CB::Output + where + CB: ProducerCallback, + { + self.zip.with_producer(callback) + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..d5d0314 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,120 @@ +#![doc(html_root_url = "https://docs.rs/rayon/1.5")] +#![deny(missing_debug_implementations)] +#![deny(missing_docs)] +#![deny(unreachable_pub)] +#![warn(rust_2018_idioms)] + +//! Data-parallelism library that makes it easy to convert sequential +//! computations into parallel +//! +//! Rayon is lightweight and convenient for introducing parallelism into existing +//! code. It guarantees data-race free executions and takes advantage of +//! parallelism when sensible, based on work-load at runtime. +//! +//! # How to use Rayon +//! +//! There are two ways to use Rayon: +//! +//! - **High-level parallel constructs** are the simplest way to use Rayon and also +//! typically the most efficient. +//! - [Parallel iterators][iter module] make it easy to convert a sequential iterator to +//! execute in parallel. +//! - The [`ParallelIterator`] trait defines general methods for all parallel iterators. +//! - The [`IndexedParallelIterator`] trait adds methods for iterators that support random +//! access. +//! - The [`par_sort`] method sorts `&mut [T]` slices (or vectors) in parallel. +//! - [`par_extend`] can be used to efficiently grow collections with items produced +//! by a parallel iterator. +//! - **Custom tasks** let you divide your work into parallel tasks yourself. +//! - [`join`] is used to subdivide a task into two pieces. +//! - [`scope`] creates a scope within which you can create any number of parallel tasks. +//! - [`ThreadPoolBuilder`] can be used to create your own thread pools or customize +//! the global one. +//! +//! [iter module]: iter/index.html +//! [`join`]: fn.join.html +//! [`scope`]: fn.scope.html +//! [`par_sort`]: slice/trait.ParallelSliceMut.html#method.par_sort +//! [`par_extend`]: iter/trait.ParallelExtend.html#tymethod.par_extend +//! [`ThreadPoolBuilder`]: struct.ThreadPoolBuilder.html +//! +//! # Basic usage and the Rayon prelude +//! +//! First, you will need to add `rayon` to your `Cargo.toml`. +//! +//! Next, to use parallel iterators or the other high-level methods, +//! you need to import several traits. Those traits are bundled into +//! the module [`rayon::prelude`]. It is recommended that you import +//! all of these traits at once by adding `use rayon::prelude::*` at +//! the top of each module that uses Rayon methods. +//! +//! These traits give you access to the `par_iter` method which provides +//! parallel implementations of many iterative functions such as [`map`], +//! [`for_each`], [`filter`], [`fold`], and [more]. +//! +//! [`rayon::prelude`]: prelude/index.html +//! [`map`]: iter/trait.ParallelIterator.html#method.map +//! [`for_each`]: iter/trait.ParallelIterator.html#method.for_each +//! [`filter`]: iter/trait.ParallelIterator.html#method.filter +//! [`fold`]: iter/trait.ParallelIterator.html#method.fold +//! [more]: iter/trait.ParallelIterator.html#provided-methods +//! [`ParallelIterator`]: iter/trait.ParallelIterator.html +//! [`IndexedParallelIterator`]: iter/trait.IndexedParallelIterator.html +//! +//! # Crate Layout +//! +//! Rayon extends many of the types found in the standard library with +//! parallel iterator implementations. The modules in the `rayon` +//! crate mirror [`std`] itself: so, e.g., the `option` module in +//! Rayon contains parallel iterators for the `Option` type, which is +//! found in [the `option` module of `std`]. Similarly, the +//! `collections` module in Rayon offers parallel iterator types for +//! [the `collections` from `std`]. You will rarely need to access +//! these submodules unless you need to name iterator types +//! explicitly. +//! +//! [the `option` module of `std`]: https://doc.rust-lang.org/std/option/index.html +//! [the `collections` from `std`]: https://doc.rust-lang.org/std/collections/index.html +//! [`std`]: https://doc.rust-lang.org/std/ +//! +//! # Other questions? +//! +//! See [the Rayon FAQ][faq]. +//! +//! [faq]: https://github.com/rayon-rs/rayon/blob/master/FAQ.md + +#[macro_use] +mod delegate; + +#[macro_use] +mod private; + +mod split_producer; + +pub mod collections; +pub mod iter; +pub mod option; +pub mod prelude; +pub mod range; +pub mod range_inclusive; +pub mod result; +pub mod slice; +pub mod str; +pub mod string; +pub mod vec; + +mod math; +mod par_either; + +mod compile_fail; + +pub use rayon_core::FnContext; +pub use rayon_core::ThreadBuilder; +pub use rayon_core::ThreadPool; +pub use rayon_core::ThreadPoolBuildError; +pub use rayon_core::ThreadPoolBuilder; +pub use rayon_core::{current_num_threads, current_thread_index}; +pub use rayon_core::{join, join_context}; +pub use rayon_core::{scope, Scope}; +pub use rayon_core::{scope_fifo, ScopeFifo}; +pub use rayon_core::{spawn, spawn_fifo}; diff --git a/src/math.rs b/src/math.rs new file mode 100644 index 0000000..9de5889 --- /dev/null +++ b/src/math.rs @@ -0,0 +1,54 @@ +use std::ops::{Bound, Range, RangeBounds}; + +/// Divide `n` by `divisor`, and round up to the nearest integer +/// if not evenly divisable. +#[inline] +pub(super) fn div_round_up(n: usize, divisor: usize) -> usize { + debug_assert!(divisor != 0, "Division by zero!"); + if n == 0 { + 0 + } else { + (n - 1) / divisor + 1 + } +} + +/// Normalize arbitrary `RangeBounds` to a `Range` +pub(super) fn simplify_range(range: impl RangeBounds, len: usize) -> Range { + let start = match range.start_bound() { + Bound::Unbounded => 0, + Bound::Included(&i) if i <= len => i, + Bound::Excluded(&i) if i < len => i + 1, + bound => panic!("range start {:?} should be <= length {}", bound, len), + }; + let end = match range.end_bound() { + Bound::Unbounded => len, + Bound::Excluded(&i) if i <= len => i, + Bound::Included(&i) if i < len => i + 1, + bound => panic!("range end {:?} should be <= length {}", bound, len), + }; + if start > end { + panic!( + "range start {:?} should be <= range end {:?}", + range.start_bound(), + range.end_bound() + ); + } + start..end +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn check_div_round_up() { + assert_eq!(0, div_round_up(0, 5)); + assert_eq!(1, div_round_up(5, 5)); + assert_eq!(1, div_round_up(1, 5)); + assert_eq!(2, div_round_up(3, 2)); + assert_eq!( + usize::max_value() / 2 + 1, + div_round_up(usize::max_value(), 2) + ); + } +} diff --git a/src/option.rs b/src/option.rs new file mode 100644 index 0000000..0f56896 --- /dev/null +++ b/src/option.rs @@ -0,0 +1,203 @@ +//! Parallel iterator types for [options][std::option] +//! +//! You will rarely need to interact with this module directly unless you need +//! to name one of the iterator types. +//! +//! [std::option]: https://doc.rust-lang.org/stable/std/option/ + +use crate::iter::plumbing::*; +use crate::iter::*; +use std::sync::atomic::{AtomicBool, Ordering}; + +/// A parallel iterator over the value in [`Some`] variant of an [`Option`]. +/// +/// The iterator yields one value if the [`Option`] is a [`Some`], otherwise none. +/// +/// This `struct` is created by the [`into_par_iter`] function. +/// +/// [`Option`]: https://doc.rust-lang.org/std/option/enum.Option.html +/// [`Some`]: https://doc.rust-lang.org/std/option/enum.Option.html#variant.Some +/// [`into_par_iter`]: ../iter/trait.IntoParallelIterator.html#tymethod.into_par_iter +#[derive(Debug, Clone)] +pub struct IntoIter { + opt: Option, +} + +impl IntoParallelIterator for Option { + type Item = T; + type Iter = IntoIter; + + fn into_par_iter(self) -> Self::Iter { + IntoIter { opt: self } + } +} + +impl ParallelIterator for IntoIter { + type Item = T; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + self.drive(consumer) + } + + fn opt_len(&self) -> Option { + Some(self.len()) + } +} + +impl IndexedParallelIterator for IntoIter { + fn drive(self, consumer: C) -> C::Result + where + C: Consumer, + { + let mut folder = consumer.into_folder(); + if let Some(item) = self.opt { + folder = folder.consume(item); + } + folder.complete() + } + + fn len(&self) -> usize { + match self.opt { + Some(_) => 1, + None => 0, + } + } + + fn with_producer(self, callback: CB) -> CB::Output + where + CB: ProducerCallback, + { + callback.callback(OptionProducer { opt: self.opt }) + } +} + +/// A parallel iterator over a reference to the [`Some`] variant of an [`Option`]. +/// +/// The iterator yields one value if the [`Option`] is a [`Some`], otherwise none. +/// +/// This `struct` is created by the [`par_iter`] function. +/// +/// [`Option`]: https://doc.rust-lang.org/std/option/enum.Option.html +/// [`Some`]: https://doc.rust-lang.org/std/option/enum.Option.html#variant.Some +/// [`par_iter`]: ../iter/trait.IntoParallelRefIterator.html#tymethod.par_iter +#[derive(Debug)] +pub struct Iter<'a, T: Sync> { + inner: IntoIter<&'a T>, +} + +impl<'a, T: Sync> Clone for Iter<'a, T> { + fn clone(&self) -> Self { + Iter { + inner: self.inner.clone(), + } + } +} + +impl<'a, T: Sync> IntoParallelIterator for &'a Option { + type Item = &'a T; + type Iter = Iter<'a, T>; + + fn into_par_iter(self) -> Self::Iter { + Iter { + inner: self.as_ref().into_par_iter(), + } + } +} + +delegate_indexed_iterator! { + Iter<'a, T> => &'a T, + impl<'a, T: Sync + 'a> +} + +/// A parallel iterator over a mutable reference to the [`Some`] variant of an [`Option`]. +/// +/// The iterator yields one value if the [`Option`] is a [`Some`], otherwise none. +/// +/// This `struct` is created by the [`par_iter_mut`] function. +/// +/// [`Option`]: https://doc.rust-lang.org/std/option/enum.Option.html +/// [`Some`]: https://doc.rust-lang.org/std/option/enum.Option.html#variant.Some +/// [`par_iter_mut`]: ../iter/trait.IntoParallelRefMutIterator.html#tymethod.par_iter_mut +#[derive(Debug)] +pub struct IterMut<'a, T: Send> { + inner: IntoIter<&'a mut T>, +} + +impl<'a, T: Send> IntoParallelIterator for &'a mut Option { + type Item = &'a mut T; + type Iter = IterMut<'a, T>; + + fn into_par_iter(self) -> Self::Iter { + IterMut { + inner: self.as_mut().into_par_iter(), + } + } +} + +delegate_indexed_iterator! { + IterMut<'a, T> => &'a mut T, + impl<'a, T: Send + 'a> +} + +/// Private producer for an option +struct OptionProducer { + opt: Option, +} + +impl Producer for OptionProducer { + type Item = T; + type IntoIter = std::option::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.opt.into_iter() + } + + fn split_at(self, index: usize) -> (Self, Self) { + debug_assert!(index <= 1); + let none = OptionProducer { opt: None }; + if index == 0 { + (none, self) + } else { + (self, none) + } + } +} + +/// Collect an arbitrary `Option`-wrapped collection. +/// +/// If any item is `None`, then all previous items collected are discarded, +/// and it returns only `None`. +impl FromParallelIterator> for Option +where + C: FromParallelIterator, + T: Send, +{ + fn from_par_iter(par_iter: I) -> Self + where + I: IntoParallelIterator>, + { + fn check(found_none: &AtomicBool) -> impl Fn(&Option) + '_ { + move |item| { + if item.is_none() { + found_none.store(true, Ordering::Relaxed); + } + } + } + + let found_none = AtomicBool::new(false); + let collection = par_iter + .into_par_iter() + .inspect(check(&found_none)) + .while_some() + .collect(); + + if found_none.load(Ordering::Relaxed) { + None + } else { + Some(collection) + } + } +} diff --git a/src/par_either.rs b/src/par_either.rs new file mode 100644 index 0000000..a19ce53 --- /dev/null +++ b/src/par_either.rs @@ -0,0 +1,74 @@ +use crate::iter::plumbing::*; +use crate::iter::Either::{Left, Right}; +use crate::iter::*; + +/// `Either` is a parallel iterator if both `L` and `R` are parallel iterators. +impl ParallelIterator for Either +where + L: ParallelIterator, + R: ParallelIterator, +{ + type Item = L::Item; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + match self { + Left(iter) => iter.drive_unindexed(consumer), + Right(iter) => iter.drive_unindexed(consumer), + } + } + + fn opt_len(&self) -> Option { + self.as_ref().either(L::opt_len, R::opt_len) + } +} + +impl IndexedParallelIterator for Either +where + L: IndexedParallelIterator, + R: IndexedParallelIterator, +{ + fn drive(self, consumer: C) -> C::Result + where + C: Consumer, + { + match self { + Left(iter) => iter.drive(consumer), + Right(iter) => iter.drive(consumer), + } + } + + fn len(&self) -> usize { + self.as_ref().either(L::len, R::len) + } + + fn with_producer(self, callback: CB) -> CB::Output + where + CB: ProducerCallback, + { + match self { + Left(iter) => iter.with_producer(callback), + Right(iter) => iter.with_producer(callback), + } + } +} + +/// `Either` can be extended if both `L` and `R` are parallel extendable. +impl ParallelExtend for Either +where + L: ParallelExtend, + R: ParallelExtend, + T: Send, +{ + fn par_extend(&mut self, par_iter: I) + where + I: IntoParallelIterator, + { + match self.as_mut() { + Left(collection) => collection.par_extend(par_iter), + Right(collection) => collection.par_extend(par_iter), + } + } +} diff --git a/src/prelude.rs b/src/prelude.rs new file mode 100644 index 0000000..6eaca06 --- /dev/null +++ b/src/prelude.rs @@ -0,0 +1,17 @@ +//! The rayon prelude imports the various `ParallelIterator` traits. +//! The intention is that one can include `use rayon::prelude::*` and +//! have easy access to the various traits and methods you will need. + +pub use crate::iter::FromParallelIterator; +pub use crate::iter::IndexedParallelIterator; +pub use crate::iter::IntoParallelIterator; +pub use crate::iter::IntoParallelRefIterator; +pub use crate::iter::IntoParallelRefMutIterator; +pub use crate::iter::ParallelBridge; +pub use crate::iter::ParallelDrainFull; +pub use crate::iter::ParallelDrainRange; +pub use crate::iter::ParallelExtend; +pub use crate::iter::ParallelIterator; +pub use crate::slice::ParallelSlice; +pub use crate::slice::ParallelSliceMut; +pub use crate::str::ParallelString; diff --git a/src/private.rs b/src/private.rs new file mode 100644 index 0000000..c85e77b --- /dev/null +++ b/src/private.rs @@ -0,0 +1,26 @@ +//! The public parts of this private module are used to create traits +//! that cannot be implemented outside of our own crate. This way we +//! can feel free to extend those traits without worrying about it +//! being a breaking change for other implementations. + +/// If this type is pub but not publicly reachable, third parties +/// can't name it and can't implement traits using it. +#[allow(missing_debug_implementations)] +pub struct PrivateMarker; + +macro_rules! private_decl { + () => { + /// This trait is private; this method exists to make it + /// impossible to implement outside the crate. + #[doc(hidden)] + fn __rayon_private__(&self) -> crate::private::PrivateMarker; + }; +} + +macro_rules! private_impl { + () => { + fn __rayon_private__(&self) -> crate::private::PrivateMarker { + crate::private::PrivateMarker + } + }; +} diff --git a/src/range.rs b/src/range.rs new file mode 100644 index 0000000..09ba25e --- /dev/null +++ b/src/range.rs @@ -0,0 +1,368 @@ +//! Parallel iterator types for [ranges][std::range], +//! the type for values created by `a..b` expressions +//! +//! You will rarely need to interact with this module directly unless you have +//! need to name one of the iterator types. +//! +//! ``` +//! use rayon::prelude::*; +//! +//! let r = (0..100u64).into_par_iter() +//! .sum(); +//! +//! // compare result with sequential calculation +//! assert_eq!((0..100).sum::(), r); +//! ``` +//! +//! [std::range]: https://doc.rust-lang.org/core/ops/struct.Range.html + +use crate::iter::plumbing::*; +use crate::iter::*; +use std::char; +use std::ops::Range; +use std::usize; + +/// Parallel iterator over a range, implemented for all integer types. +/// +/// **Note:** The `zip` operation requires `IndexedParallelIterator` +/// which is not implemented for `u64`, `i64`, `u128`, or `i128`. +/// +/// ``` +/// use rayon::prelude::*; +/// +/// let p = (0..25usize).into_par_iter() +/// .zip(0..25usize) +/// .filter(|&(x, y)| x % 5 == 0 || y % 5 == 0) +/// .map(|(x, y)| x * y) +/// .sum::(); +/// +/// let s = (0..25usize).zip(0..25) +/// .filter(|&(x, y)| x % 5 == 0 || y % 5 == 0) +/// .map(|(x, y)| x * y) +/// .sum(); +/// +/// assert_eq!(p, s); +/// ``` +#[derive(Debug, Clone)] +pub struct Iter { + range: Range, +} + +impl IntoParallelIterator for Range +where + Iter: ParallelIterator, +{ + type Item = as ParallelIterator>::Item; + type Iter = Iter; + + fn into_par_iter(self) -> Self::Iter { + Iter { range: self } + } +} + +struct IterProducer { + range: Range, +} + +impl IntoIterator for IterProducer +where + Range: Iterator, +{ + type Item = as Iterator>::Item; + type IntoIter = Range; + + fn into_iter(self) -> Self::IntoIter { + self.range + } +} + +macro_rules! indexed_range_impl { + ( $t:ty ) => { + impl ParallelIterator for Iter<$t> { + type Item = $t; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + bridge(self, consumer) + } + + fn opt_len(&self) -> Option { + Some(self.len()) + } + } + + impl IndexedParallelIterator for Iter<$t> { + fn drive(self, consumer: C) -> C::Result + where + C: Consumer, + { + bridge(self, consumer) + } + + fn len(&self) -> usize { + self.range.len() + } + + fn with_producer(self, callback: CB) -> CB::Output + where + CB: ProducerCallback, + { + callback.callback(IterProducer { range: self.range }) + } + } + + impl Producer for IterProducer<$t> { + type Item = as Iterator>::Item; + type IntoIter = Range<$t>; + fn into_iter(self) -> Self::IntoIter { + self.range + } + + fn split_at(self, index: usize) -> (Self, Self) { + assert!(index <= self.range.len()); + // For signed $t, the length and requested index could be greater than $t::MAX, and + // then `index as $t` could wrap to negative, so wrapping_add is necessary. + let mid = self.range.start.wrapping_add(index as $t); + let left = self.range.start..mid; + let right = mid..self.range.end; + (IterProducer { range: left }, IterProducer { range: right }) + } + } + }; +} + +trait UnindexedRangeLen { + fn len(&self) -> L; +} + +macro_rules! unindexed_range_impl { + ( $t:ty, $len_t:ty ) => { + impl UnindexedRangeLen<$len_t> for Range<$t> { + fn len(&self) -> $len_t { + let &Range { start, end } = self; + if end > start { + end.wrapping_sub(start) as $len_t + } else { + 0 + } + } + } + + impl ParallelIterator for Iter<$t> { + type Item = $t; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + #[inline] + fn offset(start: $t) -> impl Fn(usize) -> $t { + move |i| start.wrapping_add(i as $t) + } + + if let Some(len) = self.opt_len() { + // Drive this in indexed mode for better `collect`. + (0..len) + .into_par_iter() + .map(offset(self.range.start)) + .drive(consumer) + } else { + bridge_unindexed(IterProducer { range: self.range }, consumer) + } + } + + fn opt_len(&self) -> Option { + let len = self.range.len(); + if len <= usize::MAX as $len_t { + Some(len as usize) + } else { + None + } + } + } + + impl UnindexedProducer for IterProducer<$t> { + type Item = $t; + + fn split(mut self) -> (Self, Option) { + let index = self.range.len() / 2; + if index > 0 { + let mid = self.range.start.wrapping_add(index as $t); + let right = mid..self.range.end; + self.range.end = mid; + (self, Some(IterProducer { range: right })) + } else { + (self, None) + } + } + + fn fold_with(self, folder: F) -> F + where + F: Folder, + { + folder.consume_iter(self) + } + } + }; +} + +// all Range with ExactSizeIterator +indexed_range_impl! {u8} +indexed_range_impl! {u16} +indexed_range_impl! {u32} +indexed_range_impl! {usize} +indexed_range_impl! {i8} +indexed_range_impl! {i16} +indexed_range_impl! {i32} +indexed_range_impl! {isize} + +// other Range with just Iterator +unindexed_range_impl! {u64, u64} +unindexed_range_impl! {i64, u64} +unindexed_range_impl! {u128, u128} +unindexed_range_impl! {i128, u128} + +// char is special because of the surrogate range hole +macro_rules! convert_char { + ( $self:ident . $method:ident ( $( $arg:expr ),* ) ) => {{ + let start = $self.range.start as u32; + let end = $self.range.end as u32; + if start < 0xD800 && 0xE000 < end { + // chain the before and after surrogate range fragments + (start..0xD800) + .into_par_iter() + .chain(0xE000..end) + .map(|codepoint| unsafe { char::from_u32_unchecked(codepoint) }) + .$method($( $arg ),*) + } else { + // no surrogate range to worry about + (start..end) + .into_par_iter() + .map(|codepoint| unsafe { char::from_u32_unchecked(codepoint) }) + .$method($( $arg ),*) + } + }}; +} + +impl ParallelIterator for Iter { + type Item = char; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + convert_char!(self.drive(consumer)) + } + + fn opt_len(&self) -> Option { + Some(self.len()) + } +} + +impl IndexedParallelIterator for Iter { + // Split at the surrogate range first if we're allowed to + fn drive(self, consumer: C) -> C::Result + where + C: Consumer, + { + convert_char!(self.drive(consumer)) + } + + fn len(&self) -> usize { + // Taken from ::steps_between + let start = self.range.start as u32; + let end = self.range.end as u32; + if start < end { + let mut count = end - start; + if start < 0xD800 && 0xE000 <= end { + count -= 0x800 + } + count as usize + } else { + 0 + } + } + + fn with_producer(self, callback: CB) -> CB::Output + where + CB: ProducerCallback, + { + convert_char!(self.with_producer(callback)) + } +} + +#[test] +fn check_range_split_at_overflow() { + // Note, this split index overflows i8! + let producer = IterProducer { range: -100i8..100 }; + let (left, right) = producer.split_at(150); + let r1: i32 = left.range.map(i32::from).sum(); + let r2: i32 = right.range.map(i32::from).sum(); + assert_eq!(r1 + r2, -100); +} + +#[test] +fn test_i128_len_doesnt_overflow() { + use std::{i128, u128}; + + // Using parse because some versions of rust don't allow long literals + let octillion: i128 = "1000000000000000000000000000".parse().unwrap(); + let producer = IterProducer { + range: 0..octillion, + }; + + assert_eq!(octillion as u128, producer.range.len()); + assert_eq!(octillion as u128, (0..octillion).len()); + assert_eq!(2 * octillion as u128, (-octillion..octillion).len()); + + assert_eq!(u128::MAX, (i128::MIN..i128::MAX).len()); +} + +#[test] +fn test_u64_opt_len() { + use std::{u64, usize}; + assert_eq!(Some(100), (0..100u64).into_par_iter().opt_len()); + assert_eq!( + Some(usize::MAX), + (0..usize::MAX as u64).into_par_iter().opt_len() + ); + if (usize::MAX as u64) < u64::MAX { + assert_eq!( + None, + (0..(usize::MAX as u64).wrapping_add(1)) + .into_par_iter() + .opt_len() + ); + assert_eq!(None, (0..u64::MAX).into_par_iter().opt_len()); + } +} + +#[test] +fn test_u128_opt_len() { + use std::{u128, usize}; + assert_eq!(Some(100), (0..100u128).into_par_iter().opt_len()); + assert_eq!( + Some(usize::MAX), + (0..usize::MAX as u128).into_par_iter().opt_len() + ); + assert_eq!(None, (0..1 + usize::MAX as u128).into_par_iter().opt_len()); + assert_eq!(None, (0..u128::MAX).into_par_iter().opt_len()); +} + +// `usize as i64` can overflow, so make sure to wrap it appropriately +// when using the `opt_len` "indexed" mode. +#[test] +#[cfg(target_pointer_width = "64")] +fn test_usize_i64_overflow() { + use crate::ThreadPoolBuilder; + use std::i64; + + let iter = (-2..i64::MAX).into_par_iter(); + assert_eq!(iter.opt_len(), Some(i64::MAX as usize + 2)); + + // always run with multiple threads to split into, or this will take forever... + let pool = ThreadPoolBuilder::new().num_threads(8).build().unwrap(); + pool.install(|| assert_eq!(iter.find_last(|_| true), Some(i64::MAX - 1))); +} diff --git a/src/range_inclusive.rs b/src/range_inclusive.rs new file mode 100644 index 0000000..c802b6c --- /dev/null +++ b/src/range_inclusive.rs @@ -0,0 +1,288 @@ +//! Parallel iterator types for [inclusive ranges][std::range], +//! the type for values created by `a..=b` expressions +//! +//! You will rarely need to interact with this module directly unless you have +//! need to name one of the iterator types. +//! +//! ``` +//! use rayon::prelude::*; +//! +//! let r = (0..=100u64).into_par_iter() +//! .sum(); +//! +//! // compare result with sequential calculation +//! assert_eq!((0..=100).sum::(), r); +//! ``` +//! +//! [std::range]: https://doc.rust-lang.org/core/ops/struct.RangeInclusive.html + +use crate::iter::plumbing::*; +use crate::iter::*; +use std::char; +use std::ops::RangeInclusive; + +/// Parallel iterator over an inclusive range, implemented for all integer types. +/// +/// **Note:** The `zip` operation requires `IndexedParallelIterator` +/// which is only implemented for `u8`, `i8`, `u16`, and `i16`. +/// +/// ``` +/// use rayon::prelude::*; +/// +/// let p = (0..=25u16).into_par_iter() +/// .zip(0..=25u16) +/// .filter(|&(x, y)| x % 5 == 0 || y % 5 == 0) +/// .map(|(x, y)| x * y) +/// .sum::(); +/// +/// let s = (0..=25u16).zip(0..=25u16) +/// .filter(|&(x, y)| x % 5 == 0 || y % 5 == 0) +/// .map(|(x, y)| x * y) +/// .sum(); +/// +/// assert_eq!(p, s); +/// ``` +#[derive(Debug, Clone)] +pub struct Iter { + range: RangeInclusive, +} + +impl Iter +where + RangeInclusive: Eq, + T: Ord + Copy, +{ + /// Returns `Some((start, end))` for `start..=end`, or `None` if it is exhausted. + /// + /// Note that `RangeInclusive` does not specify the bounds of an exhausted iterator, + /// so this is a way for us to figure out what we've got. Thankfully, all of the + /// integer types we care about can be trivially cloned. + fn bounds(&self) -> Option<(T, T)> { + let start = *self.range.start(); + let end = *self.range.end(); + if start <= end && self.range == (start..=end) { + // If the range is still nonempty, this is obviously true + // If the range is exhausted, either start > end or + // the range does not equal start..=end. + Some((start, end)) + } else { + None + } + } +} + +impl IntoParallelIterator for RangeInclusive +where + Iter: ParallelIterator, +{ + type Item = as ParallelIterator>::Item; + type Iter = Iter; + + fn into_par_iter(self) -> Self::Iter { + Iter { range: self } + } +} + +macro_rules! convert { + ( $self:ident . $method:ident ( $( $arg:expr ),* ) ) => { + if let Some((start, end)) = $self.bounds() { + if let Some(end) = end.checked_add(1) { + (start..end).into_par_iter().$method($( $arg ),*) + } else { + (start..end).into_par_iter().chain(once(end)).$method($( $arg ),*) + } + } else { + empty::().$method($( $arg ),*) + } + }; +} + +macro_rules! parallel_range_impl { + ( $t:ty ) => { + impl ParallelIterator for Iter<$t> { + type Item = $t; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + convert!(self.drive_unindexed(consumer)) + } + + fn opt_len(&self) -> Option { + convert!(self.opt_len()) + } + } + }; +} + +macro_rules! indexed_range_impl { + ( $t:ty ) => { + parallel_range_impl! { $t } + + impl IndexedParallelIterator for Iter<$t> { + fn drive(self, consumer: C) -> C::Result + where + C: Consumer, + { + convert!(self.drive(consumer)) + } + + fn len(&self) -> usize { + self.range.len() + } + + fn with_producer(self, callback: CB) -> CB::Output + where + CB: ProducerCallback, + { + convert!(self.with_producer(callback)) + } + } + }; +} + +// all RangeInclusive with ExactSizeIterator +indexed_range_impl! {u8} +indexed_range_impl! {u16} +indexed_range_impl! {i8} +indexed_range_impl! {i16} + +// other RangeInclusive with just Iterator +parallel_range_impl! {usize} +parallel_range_impl! {isize} +parallel_range_impl! {u32} +parallel_range_impl! {i32} +parallel_range_impl! {u64} +parallel_range_impl! {i64} +parallel_range_impl! {u128} +parallel_range_impl! {i128} + +// char is special +macro_rules! convert_char { + ( $self:ident . $method:ident ( $( $arg:expr ),* ) ) => { + if let Some((start, end)) = $self.bounds() { + let start = start as u32; + let end = end as u32; + if start < 0xD800 && 0xE000 <= end { + // chain the before and after surrogate range fragments + (start..0xD800) + .into_par_iter() + .chain(0xE000..end + 1) // cannot use RangeInclusive, so add one to end + .map(|codepoint| unsafe { char::from_u32_unchecked(codepoint) }) + .$method($( $arg ),*) + } else { + // no surrogate range to worry about + (start..end + 1) // cannot use RangeInclusive, so add one to end + .into_par_iter() + .map(|codepoint| unsafe { char::from_u32_unchecked(codepoint) }) + .$method($( $arg ),*) + } + } else { + empty().into_par_iter().$method($( $arg ),*) + } + }; +} + +impl ParallelIterator for Iter { + type Item = char; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + convert_char!(self.drive(consumer)) + } + + fn opt_len(&self) -> Option { + Some(self.len()) + } +} + +// Range is broken on 16 bit platforms, may as well benefit from it +impl IndexedParallelIterator for Iter { + // Split at the surrogate range first if we're allowed to + fn drive(self, consumer: C) -> C::Result + where + C: Consumer, + { + convert_char!(self.drive(consumer)) + } + + fn len(&self) -> usize { + if let Some((start, end)) = self.bounds() { + // Taken from ::steps_between + let start = start as u32; + let end = end as u32; + let mut count = end - start; + if start < 0xD800 && 0xE000 <= end { + count -= 0x800 + } + (count + 1) as usize // add one for inclusive + } else { + 0 + } + } + + fn with_producer(self, callback: CB) -> CB::Output + where + CB: ProducerCallback, + { + convert_char!(self.with_producer(callback)) + } +} + +#[test] +#[cfg(target_pointer_width = "64")] +fn test_u32_opt_len() { + use std::u32; + assert_eq!(Some(101), (0..=100u32).into_par_iter().opt_len()); + assert_eq!( + Some(u32::MAX as usize), + (0..=u32::MAX - 1).into_par_iter().opt_len() + ); + assert_eq!( + Some(u32::MAX as usize + 1), + (0..=u32::MAX).into_par_iter().opt_len() + ); +} + +#[test] +fn test_u64_opt_len() { + use std::{u64, usize}; + assert_eq!(Some(101), (0..=100u64).into_par_iter().opt_len()); + assert_eq!( + Some(usize::MAX), + (0..=usize::MAX as u64 - 1).into_par_iter().opt_len() + ); + assert_eq!(None, (0..=usize::MAX as u64).into_par_iter().opt_len()); + assert_eq!(None, (0..=u64::MAX).into_par_iter().opt_len()); +} + +#[test] +fn test_u128_opt_len() { + use std::{u128, usize}; + assert_eq!(Some(101), (0..=100u128).into_par_iter().opt_len()); + assert_eq!( + Some(usize::MAX), + (0..=usize::MAX as u128 - 1).into_par_iter().opt_len() + ); + assert_eq!(None, (0..=usize::MAX as u128).into_par_iter().opt_len()); + assert_eq!(None, (0..=u128::MAX).into_par_iter().opt_len()); +} + +// `usize as i64` can overflow, so make sure to wrap it appropriately +// when using the `opt_len` "indexed" mode. +#[test] +#[cfg(target_pointer_width = "64")] +fn test_usize_i64_overflow() { + use crate::ThreadPoolBuilder; + use std::i64; + + let iter = (-2..=i64::MAX).into_par_iter(); + assert_eq!(iter.opt_len(), Some(i64::MAX as usize + 3)); + + // always run with multiple threads to split into, or this will take forever... + let pool = ThreadPoolBuilder::new().num_threads(8).build().unwrap(); + pool.install(|| assert_eq!(iter.find_last(|_| true), Some(i64::MAX))); +} diff --git a/src/result.rs b/src/result.rs new file mode 100644 index 0000000..43685ca --- /dev/null +++ b/src/result.rs @@ -0,0 +1,132 @@ +//! Parallel iterator types for [results][std::result] +//! +//! You will rarely need to interact with this module directly unless you need +//! to name one of the iterator types. +//! +//! [std::result]: https://doc.rust-lang.org/stable/std/result/ + +use crate::iter::plumbing::*; +use crate::iter::*; +use std::sync::Mutex; + +use crate::option; + +/// Parallel iterator over a result +#[derive(Debug, Clone)] +pub struct IntoIter { + inner: option::IntoIter, +} + +impl IntoParallelIterator for Result { + type Item = T; + type Iter = IntoIter; + + fn into_par_iter(self) -> Self::Iter { + IntoIter { + inner: self.ok().into_par_iter(), + } + } +} + +delegate_indexed_iterator! { + IntoIter => T, + impl +} + +/// Parallel iterator over an immutable reference to a result +#[derive(Debug)] +pub struct Iter<'a, T: Sync> { + inner: option::IntoIter<&'a T>, +} + +impl<'a, T: Sync> Clone for Iter<'a, T> { + fn clone(&self) -> Self { + Iter { + inner: self.inner.clone(), + } + } +} + +impl<'a, T: Sync, E> IntoParallelIterator for &'a Result { + type Item = &'a T; + type Iter = Iter<'a, T>; + + fn into_par_iter(self) -> Self::Iter { + Iter { + inner: self.as_ref().ok().into_par_iter(), + } + } +} + +delegate_indexed_iterator! { + Iter<'a, T> => &'a T, + impl<'a, T: Sync + 'a> +} + +/// Parallel iterator over a mutable reference to a result +#[derive(Debug)] +pub struct IterMut<'a, T: Send> { + inner: option::IntoIter<&'a mut T>, +} + +impl<'a, T: Send, E> IntoParallelIterator for &'a mut Result { + type Item = &'a mut T; + type Iter = IterMut<'a, T>; + + fn into_par_iter(self) -> Self::Iter { + IterMut { + inner: self.as_mut().ok().into_par_iter(), + } + } +} + +delegate_indexed_iterator! { + IterMut<'a, T> => &'a mut T, + impl<'a, T: Send + 'a> +} + +/// Collect an arbitrary `Result`-wrapped collection. +/// +/// If any item is `Err`, then all previous `Ok` items collected are +/// discarded, and it returns that error. If there are multiple errors, the +/// one returned is not deterministic. +impl FromParallelIterator> for Result +where + C: FromParallelIterator, + T: Send, + E: Send, +{ + fn from_par_iter(par_iter: I) -> Self + where + I: IntoParallelIterator>, + { + fn ok(saved: &Mutex>) -> impl Fn(Result) -> Option + '_ { + move |item| match item { + Ok(item) => Some(item), + Err(error) => { + // We don't need a blocking `lock()`, as anybody + // else holding the lock will also be writing + // `Some(error)`, and then ours is irrelevant. + if let Ok(mut guard) = saved.try_lock() { + if guard.is_none() { + *guard = Some(error); + } + } + None + } + } + } + + let saved_error = Mutex::new(None); + let collection = par_iter + .into_par_iter() + .map(ok(&saved_error)) + .while_some() + .collect(); + + match saved_error.into_inner().unwrap() { + Some(error) => Err(error), + None => Ok(collection), + } + } +} diff --git a/src/slice/mergesort.rs b/src/slice/mergesort.rs new file mode 100644 index 0000000..a007cae --- /dev/null +++ b/src/slice/mergesort.rs @@ -0,0 +1,763 @@ +//! Parallel merge sort. +//! +//! This implementation is copied verbatim from `std::slice::sort` and then parallelized. +//! The only difference from the original is that the sequential `mergesort` returns +//! `MergesortResult` and leaves descending arrays intact. + +use crate::iter::*; +use crate::slice::ParallelSliceMut; +use std::mem; +use std::mem::size_of; +use std::ptr; +use std::slice; + +unsafe fn get_and_increment(ptr: &mut *mut T) -> *mut T { + let old = *ptr; + *ptr = ptr.offset(1); + old +} + +unsafe fn decrement_and_get(ptr: &mut *mut T) -> *mut T { + *ptr = ptr.offset(-1); + *ptr +} + +/// When dropped, copies from `src` into `dest` a sequence of length `len`. +struct CopyOnDrop { + src: *mut T, + dest: *mut T, + len: usize, +} + +impl Drop for CopyOnDrop { + fn drop(&mut self) { + unsafe { + ptr::copy_nonoverlapping(self.src, self.dest, self.len); + } + } +} + +/// Inserts `v[0]` into pre-sorted sequence `v[1..]` so that whole `v[..]` becomes sorted. +/// +/// This is the integral subroutine of insertion sort. +fn insert_head(v: &mut [T], is_less: &F) +where + F: Fn(&T, &T) -> bool, +{ + if v.len() >= 2 && is_less(&v[1], &v[0]) { + unsafe { + // There are three ways to implement insertion here: + // + // 1. Swap adjacent elements until the first one gets to its final destination. + // However, this way we copy data around more than is necessary. If elements are big + // structures (costly to copy), this method will be slow. + // + // 2. Iterate until the right place for the first element is found. Then shift the + // elements succeeding it to make room for it and finally place it into the + // remaining hole. This is a good method. + // + // 3. Copy the first element into a temporary variable. Iterate until the right place + // for it is found. As we go along, copy every traversed element into the slot + // preceding it. Finally, copy data from the temporary variable into the remaining + // hole. This method is very good. Benchmarks demonstrated slightly better + // performance than with the 2nd method. + // + // All methods were benchmarked, and the 3rd showed best results. So we chose that one. + let mut tmp = NoDrop { + value: Some(ptr::read(&v[0])), + }; + + // Intermediate state of the insertion process is always tracked by `hole`, which + // serves two purposes: + // 1. Protects integrity of `v` from panics in `is_less`. + // 2. Fills the remaining hole in `v` in the end. + // + // Panic safety: + // + // If `is_less` panics at any point during the process, `hole` will get dropped and + // fill the hole in `v` with `tmp`, thus ensuring that `v` still holds every object it + // initially held exactly once. + let mut hole = InsertionHole { + src: tmp.value.as_mut().unwrap(), + dest: &mut v[1], + }; + ptr::copy_nonoverlapping(&v[1], &mut v[0], 1); + + for i in 2..v.len() { + if !is_less(&v[i], tmp.value.as_ref().unwrap()) { + break; + } + ptr::copy_nonoverlapping(&v[i], &mut v[i - 1], 1); + hole.dest = &mut v[i]; + } + // `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`. + } + } + + // Holds a value, but never drops it. + struct NoDrop { + value: Option, + } + + impl Drop for NoDrop { + fn drop(&mut self) { + mem::forget(self.value.take()); + } + } + + // When dropped, copies from `src` into `dest`. + struct InsertionHole { + src: *mut T, + dest: *mut T, + } + + impl Drop for InsertionHole { + fn drop(&mut self) { + unsafe { + ptr::copy_nonoverlapping(self.src, self.dest, 1); + } + } + } +} + +/// Merges non-decreasing runs `v[..mid]` and `v[mid..]` using `buf` as temporary storage, and +/// stores the result into `v[..]`. +/// +/// # Safety +/// +/// The two slices must be non-empty and `mid` must be in bounds. Buffer `buf` must be long enough +/// to hold a copy of the shorter slice. Also, `T` must not be a zero-sized type. +unsafe fn merge(v: &mut [T], mid: usize, buf: *mut T, is_less: &F) +where + F: Fn(&T, &T) -> bool, +{ + let len = v.len(); + let v = v.as_mut_ptr(); + let v_mid = v.add(mid); + let v_end = v.add(len); + + // The merge process first copies the shorter run into `buf`. Then it traces the newly copied + // run and the longer run forwards (or backwards), comparing their next unconsumed elements and + // copying the lesser (or greater) one into `v`. + // + // As soon as the shorter run is fully consumed, the process is done. If the longer run gets + // consumed first, then we must copy whatever is left of the shorter run into the remaining + // hole in `v`. + // + // Intermediate state of the process is always tracked by `hole`, which serves two purposes: + // 1. Protects integrity of `v` from panics in `is_less`. + // 2. Fills the remaining hole in `v` if the longer run gets consumed first. + // + // Panic safety: + // + // If `is_less` panics at any point during the process, `hole` will get dropped and fill the + // hole in `v` with the unconsumed range in `buf`, thus ensuring that `v` still holds every + // object it initially held exactly once. + let mut hole; + + if mid <= len - mid { + // The left run is shorter. + ptr::copy_nonoverlapping(v, buf, mid); + hole = MergeHole { + start: buf, + end: buf.add(mid), + dest: v, + }; + + // Initially, these pointers point to the beginnings of their arrays. + let left = &mut hole.start; + let mut right = v_mid; + let out = &mut hole.dest; + + while *left < hole.end && right < v_end { + // Consume the lesser side. + // If equal, prefer the left run to maintain stability. + let to_copy = if is_less(&*right, &**left) { + get_and_increment(&mut right) + } else { + get_and_increment(left) + }; + ptr::copy_nonoverlapping(to_copy, get_and_increment(out), 1); + } + } else { + // The right run is shorter. + ptr::copy_nonoverlapping(v_mid, buf, len - mid); + hole = MergeHole { + start: buf, + end: buf.add(len - mid), + dest: v_mid, + }; + + // Initially, these pointers point past the ends of their arrays. + let left = &mut hole.dest; + let right = &mut hole.end; + let mut out = v_end; + + while v < *left && buf < *right { + // Consume the greater side. + // If equal, prefer the right run to maintain stability. + let to_copy = if is_less(&*right.offset(-1), &*left.offset(-1)) { + decrement_and_get(left) + } else { + decrement_and_get(right) + }; + ptr::copy_nonoverlapping(to_copy, decrement_and_get(&mut out), 1); + } + } + // Finally, `hole` gets dropped. If the shorter run was not fully consumed, whatever remains of + // it will now be copied into the hole in `v`. + + // When dropped, copies the range `start..end` into `dest..`. + struct MergeHole { + start: *mut T, + end: *mut T, + dest: *mut T, + } + + impl Drop for MergeHole { + fn drop(&mut self) { + // `T` is not a zero-sized type, so it's okay to divide by its size. + let len = (self.end as usize - self.start as usize) / size_of::(); + unsafe { + ptr::copy_nonoverlapping(self.start, self.dest, len); + } + } + } +} + +/// The result of merge sort. +#[must_use] +#[derive(Clone, Copy, PartialEq, Eq)] +enum MergesortResult { + /// The slice has already been sorted. + NonDescending, + /// The slice has been descending and therefore it was left intact. + Descending, + /// The slice was sorted. + Sorted, +} + +/// A sorted run that starts at index `start` and is of length `len`. +#[derive(Clone, Copy)] +struct Run { + start: usize, + len: usize, +} + +/// Examines the stack of runs and identifies the next pair of runs to merge. More specifically, +/// if `Some(r)` is returned, that means `runs[r]` and `runs[r + 1]` must be merged next. If the +/// algorithm should continue building a new run instead, `None` is returned. +/// +/// TimSort is infamous for its buggy implementations, as described here: +/// http://envisage-project.eu/timsort-specification-and-verification/ +/// +/// The gist of the story is: we must enforce the invariants on the top four runs on the stack. +/// Enforcing them on just top three is not sufficient to ensure that the invariants will still +/// hold for *all* runs in the stack. +/// +/// This function correctly checks invariants for the top four runs. Additionally, if the top +/// run starts at index 0, it will always demand a merge operation until the stack is fully +/// collapsed, in order to complete the sort. +#[inline] +fn collapse(runs: &[Run]) -> Option { + let n = runs.len(); + + if n >= 2 + && (runs[n - 1].start == 0 + || runs[n - 2].len <= runs[n - 1].len + || (n >= 3 && runs[n - 3].len <= runs[n - 2].len + runs[n - 1].len) + || (n >= 4 && runs[n - 4].len <= runs[n - 3].len + runs[n - 2].len)) + { + if n >= 3 && runs[n - 3].len < runs[n - 1].len { + Some(n - 3) + } else { + Some(n - 2) + } + } else { + None + } +} + +/// Sorts a slice using merge sort, unless it is already in descending order. +/// +/// This function doesn't modify the slice if it is already non-descending or descending. +/// Otherwise, it sorts the slice into non-descending order. +/// +/// This merge sort borrows some (but not all) ideas from TimSort, which is described in detail +/// [here](http://svn.python.org/projects/python/trunk/Objects/listsort.txt). +/// +/// The algorithm identifies strictly descending and non-descending subsequences, which are called +/// natural runs. There is a stack of pending runs yet to be merged. Each newly found run is pushed +/// onto the stack, and then some pairs of adjacent runs are merged until these two invariants are +/// satisfied: +/// +/// 1. for every `i` in `1..runs.len()`: `runs[i - 1].len > runs[i].len` +/// 2. for every `i` in `2..runs.len()`: `runs[i - 2].len > runs[i - 1].len + runs[i].len` +/// +/// The invariants ensure that the total running time is `O(n log n)` worst-case. +/// +/// # Safety +/// +/// The argument `buf` is used as a temporary buffer and must be at least as long as `v`. +unsafe fn mergesort(v: &mut [T], buf: *mut T, is_less: &F) -> MergesortResult +where + T: Send, + F: Fn(&T, &T) -> bool + Sync, +{ + // Very short runs are extended using insertion sort to span at least this many elements. + const MIN_RUN: usize = 10; + + let len = v.len(); + + // In order to identify natural runs in `v`, we traverse it backwards. That might seem like a + // strange decision, but consider the fact that merges more often go in the opposite direction + // (forwards). According to benchmarks, merging forwards is slightly faster than merging + // backwards. To conclude, identifying runs by traversing backwards improves performance. + let mut runs = vec![]; + let mut end = len; + while end > 0 { + // Find the next natural run, and reverse it if it's strictly descending. + let mut start = end - 1; + + if start > 0 { + start -= 1; + + if is_less(v.get_unchecked(start + 1), v.get_unchecked(start)) { + while start > 0 && is_less(v.get_unchecked(start), v.get_unchecked(start - 1)) { + start -= 1; + } + + // If this descending run covers the whole slice, return immediately. + if start == 0 && end == len { + return MergesortResult::Descending; + } else { + v[start..end].reverse(); + } + } else { + while start > 0 && !is_less(v.get_unchecked(start), v.get_unchecked(start - 1)) { + start -= 1; + } + + // If this non-descending run covers the whole slice, return immediately. + if end - start == len { + return MergesortResult::NonDescending; + } + } + } + + // Insert some more elements into the run if it's too short. Insertion sort is faster than + // merge sort on short sequences, so this significantly improves performance. + while start > 0 && end - start < MIN_RUN { + start -= 1; + insert_head(&mut v[start..end], &is_less); + } + + // Push this run onto the stack. + runs.push(Run { + start, + len: end - start, + }); + end = start; + + // Merge some pairs of adjacent runs to satisfy the invariants. + while let Some(r) = collapse(&runs) { + let left = runs[r + 1]; + let right = runs[r]; + merge( + &mut v[left.start..right.start + right.len], + left.len, + buf, + &is_less, + ); + + runs[r] = Run { + start: left.start, + len: left.len + right.len, + }; + runs.remove(r + 1); + } + } + + // Finally, exactly one run must remain in the stack. + debug_assert!(runs.len() == 1 && runs[0].start == 0 && runs[0].len == len); + + // The original order of the slice was neither non-descending nor descending. + MergesortResult::Sorted +} + +//////////////////////////////////////////////////////////////////////////// +// Everything above this line is copied from `std::slice::sort` (with very minor tweaks). +// Everything below this line is parallelization. +//////////////////////////////////////////////////////////////////////////// + +/// Splits two sorted slices so that they can be merged in parallel. +/// +/// Returns two indices `(a, b)` so that slices `left[..a]` and `right[..b]` come before +/// `left[a..]` and `right[b..]`. +fn split_for_merge(left: &[T], right: &[T], is_less: &F) -> (usize, usize) +where + F: Fn(&T, &T) -> bool, +{ + let left_len = left.len(); + let right_len = right.len(); + + if left_len >= right_len { + let left_mid = left_len / 2; + + // Find the first element in `right` that is greater than or equal to `left[left_mid]`. + let mut a = 0; + let mut b = right_len; + while a < b { + let m = a + (b - a) / 2; + if is_less(&right[m], &left[left_mid]) { + a = m + 1; + } else { + b = m; + } + } + + (left_mid, a) + } else { + let right_mid = right_len / 2; + + // Find the first element in `left` that is greater than `right[right_mid]`. + let mut a = 0; + let mut b = left_len; + while a < b { + let m = a + (b - a) / 2; + if is_less(&right[right_mid], &left[m]) { + b = m; + } else { + a = m + 1; + } + } + + (a, right_mid) + } +} + +/// Merges slices `left` and `right` in parallel and stores the result into `dest`. +/// +/// # Safety +/// +/// The `dest` pointer must have enough space to store the result. +/// +/// Even if `is_less` panics at any point during the merge process, this function will fully copy +/// all elements from `left` and `right` into `dest` (not necessarily in sorted order). +unsafe fn par_merge(left: &mut [T], right: &mut [T], dest: *mut T, is_less: &F) +where + T: Send, + F: Fn(&T, &T) -> bool + Sync, +{ + // Slices whose lengths sum up to this value are merged sequentially. This number is slightly + // larger than `CHUNK_LENGTH`, and the reason is that merging is faster than merge sorting, so + // merging needs a bit coarser granularity in order to hide the overhead of Rayon's task + // scheduling. + const MAX_SEQUENTIAL: usize = 5000; + + let left_len = left.len(); + let right_len = right.len(); + + // Intermediate state of the merge process, which serves two purposes: + // 1. Protects integrity of `dest` from panics in `is_less`. + // 2. Copies the remaining elements as soon as one of the two sides is exhausted. + // + // Panic safety: + // + // If `is_less` panics at any point during the merge process, `s` will get dropped and copy the + // remaining parts of `left` and `right` into `dest`. + let mut s = State { + left_start: left.as_mut_ptr(), + left_end: left.as_mut_ptr().add(left_len), + right_start: right.as_mut_ptr(), + right_end: right.as_mut_ptr().add(right_len), + dest, + }; + + if left_len == 0 || right_len == 0 || left_len + right_len < MAX_SEQUENTIAL { + while s.left_start < s.left_end && s.right_start < s.right_end { + // Consume the lesser side. + // If equal, prefer the left run to maintain stability. + let to_copy = if is_less(&*s.right_start, &*s.left_start) { + get_and_increment(&mut s.right_start) + } else { + get_and_increment(&mut s.left_start) + }; + ptr::copy_nonoverlapping(to_copy, get_and_increment(&mut s.dest), 1); + } + } else { + // Function `split_for_merge` might panic. If that happens, `s` will get destructed and copy + // the whole `left` and `right` into `dest`. + let (left_mid, right_mid) = split_for_merge(left, right, is_less); + let (left_l, left_r) = left.split_at_mut(left_mid); + let (right_l, right_r) = right.split_at_mut(right_mid); + + // Prevent the destructor of `s` from running. Rayon will ensure that both calls to + // `par_merge` happen. If one of the two calls panics, they will ensure that elements still + // get copied into `dest_left` and `dest_right``. + mem::forget(s); + + // Convert the pointers to `usize` because `*mut T` is not `Send`. + let dest_l = dest as usize; + let dest_r = dest.add(left_l.len() + right_l.len()) as usize; + rayon_core::join( + || par_merge(left_l, right_l, dest_l as *mut T, is_less), + || par_merge(left_r, right_r, dest_r as *mut T, is_less), + ); + } + // Finally, `s` gets dropped if we used sequential merge, thus copying the remaining elements + // all at once. + + // When dropped, copies arrays `left_start..left_end` and `right_start..right_end` into `dest`, + // in that order. + struct State { + left_start: *mut T, + left_end: *mut T, + right_start: *mut T, + right_end: *mut T, + dest: *mut T, + } + + impl Drop for State { + fn drop(&mut self) { + let size = size_of::(); + let left_len = (self.left_end as usize - self.left_start as usize) / size; + let right_len = (self.right_end as usize - self.right_start as usize) / size; + + // Copy array `left`, followed by `right`. + unsafe { + ptr::copy_nonoverlapping(self.left_start, self.dest, left_len); + self.dest = self.dest.add(left_len); + ptr::copy_nonoverlapping(self.right_start, self.dest, right_len); + } + } + } +} + +/// Recursively merges pre-sorted chunks inside `v`. +/// +/// Chunks of `v` are stored in `chunks` as intervals (inclusive left and exclusive right bound). +/// Argument `buf` is an auxiliary buffer that will be used during the procedure. +/// If `into_buf` is true, the result will be stored into `buf`, otherwise it will be in `v`. +/// +/// # Safety +/// +/// The number of chunks must be positive and they must be adjacent: the right bound of each chunk +/// must equal the left bound of the following chunk. +/// +/// The buffer must be at least as long as `v`. +unsafe fn recurse( + v: *mut T, + buf: *mut T, + chunks: &[(usize, usize)], + into_buf: bool, + is_less: &F, +) where + T: Send, + F: Fn(&T, &T) -> bool + Sync, +{ + let len = chunks.len(); + debug_assert!(len > 0); + + // Base case of the algorithm. + // If only one chunk is remaining, there's no more work to split and merge. + if len == 1 { + if into_buf { + // Copy the chunk from `v` into `buf`. + let (start, end) = chunks[0]; + let src = v.add(start); + let dest = buf.add(start); + ptr::copy_nonoverlapping(src, dest, end - start); + } + return; + } + + // Split the chunks into two halves. + let (start, _) = chunks[0]; + let (mid, _) = chunks[len / 2]; + let (_, end) = chunks[len - 1]; + let (left, right) = chunks.split_at(len / 2); + + // After recursive calls finish we'll have to merge chunks `(start, mid)` and `(mid, end)` from + // `src` into `dest`. If the current invocation has to store the result into `buf`, we'll + // merge chunks from `v` into `buf`, and viceversa. + // + // Recursive calls flip `into_buf` at each level of recursion. More concretely, `par_merge` + // merges chunks from `buf` into `v` at the first level, from `v` into `buf` at the second + // level etc. + let (src, dest) = if into_buf { (v, buf) } else { (buf, v) }; + + // Panic safety: + // + // If `is_less` panics at any point during the recursive calls, the destructor of `guard` will + // be executed, thus copying everything from `src` into `dest`. This way we ensure that all + // chunks are in fact copied into `dest`, even if the merge process doesn't finish. + let guard = CopyOnDrop { + src: src.add(start), + dest: dest.add(start), + len: end - start, + }; + + // Convert the pointers to `usize` because `*mut T` is not `Send`. + let v = v as usize; + let buf = buf as usize; + rayon_core::join( + || recurse(v as *mut T, buf as *mut T, left, !into_buf, is_less), + || recurse(v as *mut T, buf as *mut T, right, !into_buf, is_less), + ); + + // Everything went all right - recursive calls didn't panic. + // Forget the guard in order to prevent its destructor from running. + mem::forget(guard); + + // Merge chunks `(start, mid)` and `(mid, end)` from `src` into `dest`. + let src_left = slice::from_raw_parts_mut(src.add(start), mid - start); + let src_right = slice::from_raw_parts_mut(src.add(mid), end - mid); + par_merge(src_left, src_right, dest.add(start), is_less); +} + +/// Sorts `v` using merge sort in parallel. +/// +/// The algorithm is stable, allocates memory, and `O(n log n)` worst-case. +/// The allocated temporary buffer is of the same length as is `v`. +pub(super) fn par_mergesort(v: &mut [T], is_less: F) +where + T: Send, + F: Fn(&T, &T) -> bool + Sync, +{ + // Slices of up to this length get sorted using insertion sort in order to avoid the cost of + // buffer allocation. + const MAX_INSERTION: usize = 20; + // The length of initial chunks. This number is as small as possible but so that the overhead + // of Rayon's task scheduling is still negligible. + const CHUNK_LENGTH: usize = 2000; + + // Sorting has no meaningful behavior on zero-sized types. + if size_of::() == 0 { + return; + } + + let len = v.len(); + + // Short slices get sorted in-place via insertion sort to avoid allocations. + if len <= MAX_INSERTION { + if len >= 2 { + for i in (0..len - 1).rev() { + insert_head(&mut v[i..], &is_less); + } + } + return; + } + + // Allocate a buffer to use as scratch memory. We keep the length 0 so we can keep in it + // shallow copies of the contents of `v` without risking the dtors running on copies if + // `is_less` panics. + let mut buf = Vec::::with_capacity(len); + let buf = buf.as_mut_ptr(); + + // If the slice is not longer than one chunk would be, do sequential merge sort and return. + if len <= CHUNK_LENGTH { + let res = unsafe { mergesort(v, buf, &is_less) }; + if res == MergesortResult::Descending { + v.reverse(); + } + return; + } + + // Split the slice into chunks and merge sort them in parallel. + // However, descending chunks will not be sorted - they will be simply left intact. + let mut iter = { + // Convert the pointer to `usize` because `*mut T` is not `Send`. + let buf = buf as usize; + + v.par_chunks_mut(CHUNK_LENGTH) + .with_max_len(1) + .enumerate() + .map(|(i, chunk)| { + let l = CHUNK_LENGTH * i; + let r = l + chunk.len(); + unsafe { + let buf = (buf as *mut T).add(l); + (l, r, mergesort(chunk, buf, &is_less)) + } + }) + .collect::>() + .into_iter() + .peekable() + }; + + // Now attempt to concatenate adjacent chunks that were left intact. + let mut chunks = Vec::with_capacity(iter.len()); + + while let Some((a, mut b, res)) = iter.next() { + // If this chunk was not modified by the sort procedure... + if res != MergesortResult::Sorted { + while let Some(&(x, y, r)) = iter.peek() { + // If the following chunk is of the same type and can be concatenated... + if r == res && (r == MergesortResult::Descending) == is_less(&v[x], &v[x - 1]) { + // Concatenate them. + b = y; + iter.next(); + } else { + break; + } + } + } + + // Descending chunks must be reversed. + if res == MergesortResult::Descending { + v[a..b].reverse(); + } + + chunks.push((a, b)); + } + + // All chunks are properly sorted. + // Now we just have to merge them together. + unsafe { + recurse(v.as_mut_ptr(), buf, &chunks, false, &is_less); + } +} + +#[cfg(test)] +mod tests { + use super::split_for_merge; + use rand::distributions::Uniform; + use rand::{thread_rng, Rng}; + + #[test] + fn test_split_for_merge() { + fn check(left: &[u32], right: &[u32]) { + let (l, r) = split_for_merge(left, right, &|&a, &b| a < b); + assert!(left[..l] + .iter() + .all(|&x| right[r..].iter().all(|&y| x <= y))); + assert!(right[..r].iter().all(|&x| left[l..].iter().all(|&y| x < y))); + } + + check(&[1, 2, 2, 2, 2, 3], &[1, 2, 2, 2, 2, 3]); + check(&[1, 2, 2, 2, 2, 3], &[]); + check(&[], &[1, 2, 2, 2, 2, 3]); + + let mut rng = thread_rng(); + + for _ in 0..100 { + let limit: u32 = rng.gen_range(1, 21); + let left_len: usize = rng.gen_range(0, 20); + let right_len: usize = rng.gen_range(0, 20); + + let mut left = rng + .sample_iter(&Uniform::new(0, limit)) + .take(left_len) + .collect::>(); + let mut right = rng + .sample_iter(&Uniform::new(0, limit)) + .take(right_len) + .collect::>(); + + left.sort(); + right.sort(); + check(&left, &right); + } + } +} diff --git a/src/slice/mod.rs b/src/slice/mod.rs new file mode 100644 index 0000000..b80125f --- /dev/null +++ b/src/slice/mod.rs @@ -0,0 +1,1203 @@ +//! Parallel iterator types for [slices][std::slice] +//! +//! You will rarely need to interact with this module directly unless you need +//! to name one of the iterator types. +//! +//! [std::slice]: https://doc.rust-lang.org/stable/std/slice/ + +mod mergesort; +mod quicksort; + +mod test; + +use self::mergesort::par_mergesort; +use self::quicksort::par_quicksort; +use crate::iter::plumbing::*; +use crate::iter::*; +use crate::split_producer::*; +use std::cmp; +use std::cmp::Ordering; +use std::fmt::{self, Debug}; + +use super::math::div_round_up; + +/// Parallel extensions for slices. +pub trait ParallelSlice { + /// Returns a plain slice, which is used to implement the rest of the + /// parallel methods. + fn as_parallel_slice(&self) -> &[T]; + + /// Returns a parallel iterator over subslices separated by elements that + /// match the separator. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// let smallest = [1, 2, 3, 0, 2, 4, 8, 0, 3, 6, 9] + /// .par_split(|i| *i == 0) + /// .map(|numbers| numbers.iter().min().unwrap()) + /// .min(); + /// assert_eq!(Some(&1), smallest); + /// ``` + fn par_split

(&self, separator: P) -> Split<'_, T, P> + where + P: Fn(&T) -> bool + Sync + Send, + { + Split { + slice: self.as_parallel_slice(), + separator, + } + } + + /// Returns a parallel iterator over all contiguous windows of length + /// `window_size`. The windows overlap. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// let windows: Vec<_> = [1, 2, 3].par_windows(2).collect(); + /// assert_eq!(vec![[1, 2], [2, 3]], windows); + /// ``` + fn par_windows(&self, window_size: usize) -> Windows<'_, T> { + Windows { + window_size, + slice: self.as_parallel_slice(), + } + } + + /// Returns a parallel iterator over at most `chunk_size` elements of + /// `self` at a time. The chunks do not overlap. + /// + /// If the number of elements in the iterator is not divisible by + /// `chunk_size`, the last chunk may be shorter than `chunk_size`. All + /// other chunks will have that exact length. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_chunks(2).collect(); + /// assert_eq!(chunks, vec![&[1, 2][..], &[3, 4], &[5]]); + /// ``` + fn par_chunks(&self, chunk_size: usize) -> Chunks<'_, T> { + assert!(chunk_size != 0, "chunk_size must not be zero"); + Chunks { + chunk_size, + slice: self.as_parallel_slice(), + } + } + + /// Returns a parallel iterator over `chunk_size` elements of + /// `self` at a time. The chunks do not overlap. + /// + /// If `chunk_size` does not divide the length of the slice, then the + /// last up to `chunk_size-1` elements will be omitted and can be + /// retrieved from the remainder function of the iterator. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_chunks_exact(2).collect(); + /// assert_eq!(chunks, vec![&[1, 2][..], &[3, 4]]); + /// ``` + fn par_chunks_exact(&self, chunk_size: usize) -> ChunksExact<'_, T> { + assert!(chunk_size != 0, "chunk_size must not be zero"); + let slice = self.as_parallel_slice(); + let rem = slice.len() % chunk_size; + let len = slice.len() - rem; + let (fst, snd) = slice.split_at(len); + ChunksExact { + chunk_size, + slice: fst, + rem: snd, + } + } +} + +impl ParallelSlice for [T] { + #[inline] + fn as_parallel_slice(&self) -> &[T] { + self + } +} + +/// Parallel extensions for mutable slices. +pub trait ParallelSliceMut { + /// Returns a plain mutable slice, which is used to implement the rest of + /// the parallel methods. + fn as_parallel_slice_mut(&mut self) -> &mut [T]; + + /// Returns a parallel iterator over mutable subslices separated by + /// elements that match the separator. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// let mut array = [1, 2, 3, 0, 2, 4, 8, 0, 3, 6, 9]; + /// array.par_split_mut(|i| *i == 0) + /// .for_each(|slice| slice.reverse()); + /// assert_eq!(array, [3, 2, 1, 0, 8, 4, 2, 0, 9, 6, 3]); + /// ``` + fn par_split_mut

(&mut self, separator: P) -> SplitMut<'_, T, P> + where + P: Fn(&T) -> bool + Sync + Send, + { + SplitMut { + slice: self.as_parallel_slice_mut(), + separator, + } + } + + /// Returns a parallel iterator over at most `chunk_size` elements of + /// `self` at a time. The chunks are mutable and do not overlap. + /// + /// If the number of elements in the iterator is not divisible by + /// `chunk_size`, the last chunk may be shorter than `chunk_size`. All + /// other chunks will have that exact length. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// let mut array = [1, 2, 3, 4, 5]; + /// array.par_chunks_mut(2) + /// .for_each(|slice| slice.reverse()); + /// assert_eq!(array, [2, 1, 4, 3, 5]); + /// ``` + fn par_chunks_mut(&mut self, chunk_size: usize) -> ChunksMut<'_, T> { + assert!(chunk_size != 0, "chunk_size must not be zero"); + ChunksMut { + chunk_size, + slice: self.as_parallel_slice_mut(), + } + } + + /// Returns a parallel iterator over `chunk_size` elements of + /// `self` at a time. The chunks are mutable and do not overlap. + /// + /// If `chunk_size` does not divide the length of the slice, then the + /// last up to `chunk_size-1` elements will be omitted and can be + /// retrieved from the remainder function of the iterator. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// let mut array = [1, 2, 3, 4, 5]; + /// array.par_chunks_exact_mut(3) + /// .for_each(|slice| slice.reverse()); + /// assert_eq!(array, [3, 2, 1, 4, 5]); + /// ``` + fn par_chunks_exact_mut(&mut self, chunk_size: usize) -> ChunksExactMut<'_, T> { + assert!(chunk_size != 0, "chunk_size must not be zero"); + let slice = self.as_parallel_slice_mut(); + let rem = slice.len() % chunk_size; + let len = slice.len() - rem; + let (fst, snd) = slice.split_at_mut(len); + ChunksExactMut { + chunk_size, + slice: fst, + rem: snd, + } + } + + /// Sorts the slice in parallel. + /// + /// This sort is stable (i.e. does not reorder equal elements) and `O(n log n)` worst-case. + /// + /// When applicable, unstable sorting is preferred because it is generally faster than stable + /// sorting and it doesn't allocate auxiliary memory. + /// See [`par_sort_unstable`](#method.par_sort_unstable). + /// + /// # Current implementation + /// + /// The current algorithm is an adaptive merge sort inspired by + /// [timsort](https://en.wikipedia.org/wiki/Timsort). + /// It is designed to be very fast in cases where the slice is nearly sorted, or consists of + /// two or more sorted sequences concatenated one after another. + /// + /// Also, it allocates temporary storage the same size as `self`, but for very short slices a + /// non-allocating insertion sort is used instead. + /// + /// In order to sort the slice in parallel, the slice is first divided into smaller chunks and + /// all chunks are sorted in parallel. Then, adjacent chunks that together form non-descending + /// or descending runs are concatenated. Finally, the remaining chunks are merged together using + /// parallel subdivision of chunks and parallel merge operation. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let mut v = [-5, 4, 1, -3, 2]; + /// + /// v.par_sort(); + /// assert_eq!(v, [-5, -3, 1, 2, 4]); + /// ``` + fn par_sort(&mut self) + where + T: Ord, + { + par_mergesort(self.as_parallel_slice_mut(), T::lt); + } + + /// Sorts the slice in parallel with a comparator function. + /// + /// This sort is stable (i.e. does not reorder equal elements) and `O(n log n)` worst-case. + /// + /// When applicable, unstable sorting is preferred because it is generally faster than stable + /// sorting and it doesn't allocate auxiliary memory. + /// See [`par_sort_unstable_by`](#method.par_sort_unstable_by). + /// + /// # Current implementation + /// + /// The current algorithm is an adaptive merge sort inspired by + /// [timsort](https://en.wikipedia.org/wiki/Timsort). + /// It is designed to be very fast in cases where the slice is nearly sorted, or consists of + /// two or more sorted sequences concatenated one after another. + /// + /// Also, it allocates temporary storage the same size as `self`, but for very short slices a + /// non-allocating insertion sort is used instead. + /// + /// In order to sort the slice in parallel, the slice is first divided into smaller chunks and + /// all chunks are sorted in parallel. Then, adjacent chunks that together form non-descending + /// or descending runs are concatenated. Finally, the remaining chunks are merged together using + /// parallel subdivision of chunks and parallel merge operation. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let mut v = [5, 4, 1, 3, 2]; + /// v.par_sort_by(|a, b| a.cmp(b)); + /// assert_eq!(v, [1, 2, 3, 4, 5]); + /// + /// // reverse sorting + /// v.par_sort_by(|a, b| b.cmp(a)); + /// assert_eq!(v, [5, 4, 3, 2, 1]); + /// ``` + fn par_sort_by(&mut self, compare: F) + where + F: Fn(&T, &T) -> Ordering + Sync, + { + par_mergesort(self.as_parallel_slice_mut(), |a, b| { + compare(a, b) == Ordering::Less + }); + } + + /// Sorts the slice in parallel with a key extraction function. + /// + /// This sort is stable (i.e. does not reorder equal elements) and `O(n log n)` worst-case. + /// + /// When applicable, unstable sorting is preferred because it is generally faster than stable + /// sorting and it doesn't allocate auxiliary memory. + /// See [`par_sort_unstable_by_key`](#method.par_sort_unstable_by_key). + /// + /// # Current implementation + /// + /// The current algorithm is an adaptive merge sort inspired by + /// [timsort](https://en.wikipedia.org/wiki/Timsort). + /// It is designed to be very fast in cases where the slice is nearly sorted, or consists of + /// two or more sorted sequences concatenated one after another. + /// + /// Also, it allocates temporary storage the same size as `self`, but for very short slices a + /// non-allocating insertion sort is used instead. + /// + /// In order to sort the slice in parallel, the slice is first divided into smaller chunks and + /// all chunks are sorted in parallel. Then, adjacent chunks that together form non-descending + /// or descending runs are concatenated. Finally, the remaining chunks are merged together using + /// parallel subdivision of chunks and parallel merge operation. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let mut v = [-5i32, 4, 1, -3, 2]; + /// + /// v.par_sort_by_key(|k| k.abs()); + /// assert_eq!(v, [1, 2, -3, 4, -5]); + /// ``` + fn par_sort_by_key(&mut self, f: F) + where + B: Ord, + F: Fn(&T) -> B + Sync, + { + par_mergesort(self.as_parallel_slice_mut(), |a, b| f(a).lt(&f(b))); + } + + /// Sorts the slice in parallel, but may not preserve the order of equal elements. + /// + /// This sort is unstable (i.e. may reorder equal elements), in-place (i.e. does not allocate), + /// and `O(n log n)` worst-case. + /// + /// # Current implementation + /// + /// The current algorithm is based on Orson Peters' [pattern-defeating quicksort][pdqsort], + /// which is a quicksort variant designed to be very fast on certain kinds of patterns, + /// sometimes achieving linear time. It is randomized but deterministic, and falls back to + /// heapsort on degenerate inputs. + /// + /// It is generally faster than stable sorting, except in a few special cases, e.g. when the + /// slice consists of several concatenated sorted sequences. + /// + /// All quicksorts work in two stages: partitioning into two halves followed by recursive + /// calls. The partitioning phase is sequential, but the two recursive calls are performed in + /// parallel. + /// + /// [pdqsort]: https://github.com/orlp/pdqsort + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let mut v = [-5, 4, 1, -3, 2]; + /// + /// v.par_sort_unstable(); + /// assert_eq!(v, [-5, -3, 1, 2, 4]); + /// ``` + fn par_sort_unstable(&mut self) + where + T: Ord, + { + par_quicksort(self.as_parallel_slice_mut(), T::lt); + } + + /// Sorts the slice in parallel with a comparator function, but may not preserve the order of + /// equal elements. + /// + /// This sort is unstable (i.e. may reorder equal elements), in-place (i.e. does not allocate), + /// and `O(n log n)` worst-case. + /// + /// # Current implementation + /// + /// The current algorithm is based on Orson Peters' [pattern-defeating quicksort][pdqsort], + /// which is a quicksort variant designed to be very fast on certain kinds of patterns, + /// sometimes achieving linear time. It is randomized but deterministic, and falls back to + /// heapsort on degenerate inputs. + /// + /// It is generally faster than stable sorting, except in a few special cases, e.g. when the + /// slice consists of several concatenated sorted sequences. + /// + /// All quicksorts work in two stages: partitioning into two halves followed by recursive + /// calls. The partitioning phase is sequential, but the two recursive calls are performed in + /// parallel. + /// + /// [pdqsort]: https://github.com/orlp/pdqsort + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let mut v = [5, 4, 1, 3, 2]; + /// v.par_sort_unstable_by(|a, b| a.cmp(b)); + /// assert_eq!(v, [1, 2, 3, 4, 5]); + /// + /// // reverse sorting + /// v.par_sort_unstable_by(|a, b| b.cmp(a)); + /// assert_eq!(v, [5, 4, 3, 2, 1]); + /// ``` + fn par_sort_unstable_by(&mut self, compare: F) + where + F: Fn(&T, &T) -> Ordering + Sync, + { + par_quicksort(self.as_parallel_slice_mut(), |a, b| { + compare(a, b) == Ordering::Less + }); + } + + /// Sorts the slice in parallel with a key extraction function, but may not preserve the order + /// of equal elements. + /// + /// This sort is unstable (i.e. may reorder equal elements), in-place (i.e. does not allocate), + /// and `O(n log n)` worst-case. + /// + /// # Current implementation + /// + /// The current algorithm is based on Orson Peters' [pattern-defeating quicksort][pdqsort], + /// which is a quicksort variant designed to be very fast on certain kinds of patterns, + /// sometimes achieving linear time. It is randomized but deterministic, and falls back to + /// heapsort on degenerate inputs. + /// + /// It is generally faster than stable sorting, except in a few special cases, e.g. when the + /// slice consists of several concatenated sorted sequences. + /// + /// All quicksorts work in two stages: partitioning into two halves followed by recursive + /// calls. The partitioning phase is sequential, but the two recursive calls are performed in + /// parallel. + /// + /// [pdqsort]: https://github.com/orlp/pdqsort + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let mut v = [-5i32, 4, 1, -3, 2]; + /// + /// v.par_sort_unstable_by_key(|k| k.abs()); + /// assert_eq!(v, [1, 2, -3, 4, -5]); + /// ``` + fn par_sort_unstable_by_key(&mut self, f: F) + where + B: Ord, + F: Fn(&T) -> B + Sync, + { + par_quicksort(self.as_parallel_slice_mut(), |a, b| f(a).lt(&f(b))); + } +} + +impl ParallelSliceMut for [T] { + #[inline] + fn as_parallel_slice_mut(&mut self) -> &mut [T] { + self + } +} + +impl<'data, T: Sync + 'data> IntoParallelIterator for &'data [T] { + type Item = &'data T; + type Iter = Iter<'data, T>; + + fn into_par_iter(self) -> Self::Iter { + Iter { slice: self } + } +} + +impl<'data, T: Sync + 'data> IntoParallelIterator for &'data Vec { + type Item = &'data T; + type Iter = Iter<'data, T>; + + fn into_par_iter(self) -> Self::Iter { + Iter { slice: self } + } +} + +impl<'data, T: Send + 'data> IntoParallelIterator for &'data mut [T] { + type Item = &'data mut T; + type Iter = IterMut<'data, T>; + + fn into_par_iter(self) -> Self::Iter { + IterMut { slice: self } + } +} + +impl<'data, T: Send + 'data> IntoParallelIterator for &'data mut Vec { + type Item = &'data mut T; + type Iter = IterMut<'data, T>; + + fn into_par_iter(self) -> Self::Iter { + IterMut { slice: self } + } +} + +/// Parallel iterator over immutable items in a slice +#[derive(Debug)] +pub struct Iter<'data, T: Sync> { + slice: &'data [T], +} + +impl<'data, T: Sync> Clone for Iter<'data, T> { + fn clone(&self) -> Self { + Iter { ..*self } + } +} + +impl<'data, T: Sync + 'data> ParallelIterator for Iter<'data, T> { + type Item = &'data T; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + bridge(self, consumer) + } + + fn opt_len(&self) -> Option { + Some(self.len()) + } +} + +impl<'data, T: Sync + 'data> IndexedParallelIterator for Iter<'data, T> { + fn drive(self, consumer: C) -> C::Result + where + C: Consumer, + { + bridge(self, consumer) + } + + fn len(&self) -> usize { + self.slice.len() + } + + fn with_producer(self, callback: CB) -> CB::Output + where + CB: ProducerCallback, + { + callback.callback(IterProducer { slice: self.slice }) + } +} + +struct IterProducer<'data, T: Sync> { + slice: &'data [T], +} + +impl<'data, T: 'data + Sync> Producer for IterProducer<'data, T> { + type Item = &'data T; + type IntoIter = ::std::slice::Iter<'data, T>; + + fn into_iter(self) -> Self::IntoIter { + self.slice.iter() + } + + fn split_at(self, index: usize) -> (Self, Self) { + let (left, right) = self.slice.split_at(index); + (IterProducer { slice: left }, IterProducer { slice: right }) + } +} + +/// Parallel iterator over immutable non-overlapping chunks of a slice +#[derive(Debug)] +pub struct Chunks<'data, T: Sync> { + chunk_size: usize, + slice: &'data [T], +} + +impl<'data, T: Sync> Clone for Chunks<'data, T> { + fn clone(&self) -> Self { + Chunks { ..*self } + } +} + +impl<'data, T: Sync + 'data> ParallelIterator for Chunks<'data, T> { + type Item = &'data [T]; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + bridge(self, consumer) + } + + fn opt_len(&self) -> Option { + Some(self.len()) + } +} + +impl<'data, T: Sync + 'data> IndexedParallelIterator for Chunks<'data, T> { + fn drive(self, consumer: C) -> C::Result + where + C: Consumer, + { + bridge(self, consumer) + } + + fn len(&self) -> usize { + div_round_up(self.slice.len(), self.chunk_size) + } + + fn with_producer(self, callback: CB) -> CB::Output + where + CB: ProducerCallback, + { + callback.callback(ChunksProducer { + chunk_size: self.chunk_size, + slice: self.slice, + }) + } +} + +struct ChunksProducer<'data, T: Sync> { + chunk_size: usize, + slice: &'data [T], +} + +impl<'data, T: 'data + Sync> Producer for ChunksProducer<'data, T> { + type Item = &'data [T]; + type IntoIter = ::std::slice::Chunks<'data, T>; + + fn into_iter(self) -> Self::IntoIter { + self.slice.chunks(self.chunk_size) + } + + fn split_at(self, index: usize) -> (Self, Self) { + let elem_index = cmp::min(index * self.chunk_size, self.slice.len()); + let (left, right) = self.slice.split_at(elem_index); + ( + ChunksProducer { + chunk_size: self.chunk_size, + slice: left, + }, + ChunksProducer { + chunk_size: self.chunk_size, + slice: right, + }, + ) + } +} + +/// Parallel iterator over immutable non-overlapping chunks of a slice +#[derive(Debug)] +pub struct ChunksExact<'data, T: Sync> { + chunk_size: usize, + slice: &'data [T], + rem: &'data [T], +} + +impl<'data, T: Sync> ChunksExact<'data, T> { + /// Return the remainder of the original slice that is not going to be + /// returned by the iterator. The returned slice has at most `chunk_size-1` + /// elements. + pub fn remainder(&self) -> &'data [T] { + self.rem + } +} + +impl<'data, T: Sync> Clone for ChunksExact<'data, T> { + fn clone(&self) -> Self { + ChunksExact { ..*self } + } +} + +impl<'data, T: Sync + 'data> ParallelIterator for ChunksExact<'data, T> { + type Item = &'data [T]; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + bridge(self, consumer) + } + + fn opt_len(&self) -> Option { + Some(self.len()) + } +} + +impl<'data, T: Sync + 'data> IndexedParallelIterator for ChunksExact<'data, T> { + fn drive(self, consumer: C) -> C::Result + where + C: Consumer, + { + bridge(self, consumer) + } + + fn len(&self) -> usize { + self.slice.len() / self.chunk_size + } + + fn with_producer(self, callback: CB) -> CB::Output + where + CB: ProducerCallback, + { + callback.callback(ChunksExactProducer { + chunk_size: self.chunk_size, + slice: self.slice, + }) + } +} + +struct ChunksExactProducer<'data, T: Sync> { + chunk_size: usize, + slice: &'data [T], +} + +impl<'data, T: 'data + Sync> Producer for ChunksExactProducer<'data, T> { + type Item = &'data [T]; + type IntoIter = ::std::slice::ChunksExact<'data, T>; + + fn into_iter(self) -> Self::IntoIter { + self.slice.chunks_exact(self.chunk_size) + } + + fn split_at(self, index: usize) -> (Self, Self) { + let elem_index = index * self.chunk_size; + let (left, right) = self.slice.split_at(elem_index); + ( + ChunksExactProducer { + chunk_size: self.chunk_size, + slice: left, + }, + ChunksExactProducer { + chunk_size: self.chunk_size, + slice: right, + }, + ) + } +} + +/// Parallel iterator over immutable overlapping windows of a slice +#[derive(Debug)] +pub struct Windows<'data, T: Sync> { + window_size: usize, + slice: &'data [T], +} + +impl<'data, T: Sync> Clone for Windows<'data, T> { + fn clone(&self) -> Self { + Windows { ..*self } + } +} + +impl<'data, T: Sync + 'data> ParallelIterator for Windows<'data, T> { + type Item = &'data [T]; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + bridge(self, consumer) + } + + fn opt_len(&self) -> Option { + Some(self.len()) + } +} + +impl<'data, T: Sync + 'data> IndexedParallelIterator for Windows<'data, T> { + fn drive(self, consumer: C) -> C::Result + where + C: Consumer, + { + bridge(self, consumer) + } + + fn len(&self) -> usize { + assert!(self.window_size >= 1); + self.slice.len().saturating_sub(self.window_size - 1) + } + + fn with_producer(self, callback: CB) -> CB::Output + where + CB: ProducerCallback, + { + callback.callback(WindowsProducer { + window_size: self.window_size, + slice: self.slice, + }) + } +} + +struct WindowsProducer<'data, T: Sync> { + window_size: usize, + slice: &'data [T], +} + +impl<'data, T: 'data + Sync> Producer for WindowsProducer<'data, T> { + type Item = &'data [T]; + type IntoIter = ::std::slice::Windows<'data, T>; + + fn into_iter(self) -> Self::IntoIter { + self.slice.windows(self.window_size) + } + + fn split_at(self, index: usize) -> (Self, Self) { + let left_index = cmp::min(self.slice.len(), index + (self.window_size - 1)); + let left = &self.slice[..left_index]; + let right = &self.slice[index..]; + ( + WindowsProducer { + window_size: self.window_size, + slice: left, + }, + WindowsProducer { + window_size: self.window_size, + slice: right, + }, + ) + } +} + +/// Parallel iterator over mutable items in a slice +#[derive(Debug)] +pub struct IterMut<'data, T: Send> { + slice: &'data mut [T], +} + +impl<'data, T: Send + 'data> ParallelIterator for IterMut<'data, T> { + type Item = &'data mut T; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + bridge(self, consumer) + } + + fn opt_len(&self) -> Option { + Some(self.len()) + } +} + +impl<'data, T: Send + 'data> IndexedParallelIterator for IterMut<'data, T> { + fn drive(self, consumer: C) -> C::Result + where + C: Consumer, + { + bridge(self, consumer) + } + + fn len(&self) -> usize { + self.slice.len() + } + + fn with_producer(self, callback: CB) -> CB::Output + where + CB: ProducerCallback, + { + callback.callback(IterMutProducer { slice: self.slice }) + } +} + +struct IterMutProducer<'data, T: Send> { + slice: &'data mut [T], +} + +impl<'data, T: 'data + Send> Producer for IterMutProducer<'data, T> { + type Item = &'data mut T; + type IntoIter = ::std::slice::IterMut<'data, T>; + + fn into_iter(self) -> Self::IntoIter { + self.slice.iter_mut() + } + + fn split_at(self, index: usize) -> (Self, Self) { + let (left, right) = self.slice.split_at_mut(index); + ( + IterMutProducer { slice: left }, + IterMutProducer { slice: right }, + ) + } +} + +/// Parallel iterator over mutable non-overlapping chunks of a slice +#[derive(Debug)] +pub struct ChunksMut<'data, T: Send> { + chunk_size: usize, + slice: &'data mut [T], +} + +impl<'data, T: Send + 'data> ParallelIterator for ChunksMut<'data, T> { + type Item = &'data mut [T]; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + bridge(self, consumer) + } + + fn opt_len(&self) -> Option { + Some(self.len()) + } +} + +impl<'data, T: Send + 'data> IndexedParallelIterator for ChunksMut<'data, T> { + fn drive(self, consumer: C) -> C::Result + where + C: Consumer, + { + bridge(self, consumer) + } + + fn len(&self) -> usize { + div_round_up(self.slice.len(), self.chunk_size) + } + + fn with_producer(self, callback: CB) -> CB::Output + where + CB: ProducerCallback, + { + callback.callback(ChunksMutProducer { + chunk_size: self.chunk_size, + slice: self.slice, + }) + } +} + +struct ChunksMutProducer<'data, T: Send> { + chunk_size: usize, + slice: &'data mut [T], +} + +impl<'data, T: 'data + Send> Producer for ChunksMutProducer<'data, T> { + type Item = &'data mut [T]; + type IntoIter = ::std::slice::ChunksMut<'data, T>; + + fn into_iter(self) -> Self::IntoIter { + self.slice.chunks_mut(self.chunk_size) + } + + fn split_at(self, index: usize) -> (Self, Self) { + let elem_index = cmp::min(index * self.chunk_size, self.slice.len()); + let (left, right) = self.slice.split_at_mut(elem_index); + ( + ChunksMutProducer { + chunk_size: self.chunk_size, + slice: left, + }, + ChunksMutProducer { + chunk_size: self.chunk_size, + slice: right, + }, + ) + } +} + +/// Parallel iterator over mutable non-overlapping chunks of a slice +#[derive(Debug)] +pub struct ChunksExactMut<'data, T: Send> { + chunk_size: usize, + slice: &'data mut [T], + rem: &'data mut [T], +} + +impl<'data, T: Send> ChunksExactMut<'data, T> { + /// Return the remainder of the original slice that is not going to be + /// returned by the iterator. The returned slice has at most `chunk_size-1` + /// elements. + /// + /// Note that this has to consume `self` to return the original lifetime of + /// the data, which prevents this from actually being used as a parallel + /// iterator since that also consumes. This method is provided for parity + /// with `std::iter::ChunksExactMut`, but consider calling `remainder()` or + /// `take_remainder()` as alternatives. + pub fn into_remainder(self) -> &'data mut [T] { + self.rem + } + + /// Return the remainder of the original slice that is not going to be + /// returned by the iterator. The returned slice has at most `chunk_size-1` + /// elements. + /// + /// Consider `take_remainder()` if you need access to the data with its + /// original lifetime, rather than borrowing through `&mut self` here. + pub fn remainder(&mut self) -> &mut [T] { + self.rem + } + + /// Return the remainder of the original slice that is not going to be + /// returned by the iterator. The returned slice has at most `chunk_size-1` + /// elements. Subsequent calls will return an empty slice. + pub fn take_remainder(&mut self) -> &'data mut [T] { + std::mem::replace(&mut self.rem, &mut []) + } +} + +impl<'data, T: Send + 'data> ParallelIterator for ChunksExactMut<'data, T> { + type Item = &'data mut [T]; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + bridge(self, consumer) + } + + fn opt_len(&self) -> Option { + Some(self.len()) + } +} + +impl<'data, T: Send + 'data> IndexedParallelIterator for ChunksExactMut<'data, T> { + fn drive(self, consumer: C) -> C::Result + where + C: Consumer, + { + bridge(self, consumer) + } + + fn len(&self) -> usize { + self.slice.len() / self.chunk_size + } + + fn with_producer(self, callback: CB) -> CB::Output + where + CB: ProducerCallback, + { + callback.callback(ChunksExactMutProducer { + chunk_size: self.chunk_size, + slice: self.slice, + }) + } +} + +struct ChunksExactMutProducer<'data, T: Send> { + chunk_size: usize, + slice: &'data mut [T], +} + +impl<'data, T: 'data + Send> Producer for ChunksExactMutProducer<'data, T> { + type Item = &'data mut [T]; + type IntoIter = ::std::slice::ChunksExactMut<'data, T>; + + fn into_iter(self) -> Self::IntoIter { + self.slice.chunks_exact_mut(self.chunk_size) + } + + fn split_at(self, index: usize) -> (Self, Self) { + let elem_index = index * self.chunk_size; + let (left, right) = self.slice.split_at_mut(elem_index); + ( + ChunksExactMutProducer { + chunk_size: self.chunk_size, + slice: left, + }, + ChunksExactMutProducer { + chunk_size: self.chunk_size, + slice: right, + }, + ) + } +} + +/// Parallel iterator over slices separated by a predicate +pub struct Split<'data, T, P> { + slice: &'data [T], + separator: P, +} + +impl<'data, T, P: Clone> Clone for Split<'data, T, P> { + fn clone(&self) -> Self { + Split { + separator: self.separator.clone(), + ..*self + } + } +} + +impl<'data, T: Debug, P> Debug for Split<'data, T, P> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Split").field("slice", &self.slice).finish() + } +} + +impl<'data, T, P> ParallelIterator for Split<'data, T, P> +where + P: Fn(&T) -> bool + Sync + Send, + T: Sync, +{ + type Item = &'data [T]; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + let producer = SplitProducer::new(self.slice, &self.separator); + bridge_unindexed(producer, consumer) + } +} + +/// Implement support for `SplitProducer`. +impl<'data, T, P> Fissile

for &'data [T] +where + P: Fn(&T) -> bool, +{ + fn length(&self) -> usize { + self.len() + } + + fn midpoint(&self, end: usize) -> usize { + end / 2 + } + + fn find(&self, separator: &P, start: usize, end: usize) -> Option { + self[start..end].iter().position(separator) + } + + fn rfind(&self, separator: &P, end: usize) -> Option { + self[..end].iter().rposition(separator) + } + + fn split_once(self, index: usize) -> (Self, Self) { + let (left, right) = self.split_at(index); + (left, &right[1..]) // skip the separator + } + + fn fold_splits(self, separator: &P, folder: F, skip_last: bool) -> F + where + F: Folder, + Self: Send, + { + let mut split = self.split(separator); + if skip_last { + split.next_back(); + } + folder.consume_iter(split) + } +} + +/// Parallel iterator over mutable slices separated by a predicate +pub struct SplitMut<'data, T, P> { + slice: &'data mut [T], + separator: P, +} + +impl<'data, T: Debug, P> Debug for SplitMut<'data, T, P> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("SplitMut") + .field("slice", &self.slice) + .finish() + } +} + +impl<'data, T, P> ParallelIterator for SplitMut<'data, T, P> +where + P: Fn(&T) -> bool + Sync + Send, + T: Send, +{ + type Item = &'data mut [T]; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + let producer = SplitProducer::new(self.slice, &self.separator); + bridge_unindexed(producer, consumer) + } +} + +/// Implement support for `SplitProducer`. +impl<'data, T, P> Fissile

for &'data mut [T] +where + P: Fn(&T) -> bool, +{ + fn length(&self) -> usize { + self.len() + } + + fn midpoint(&self, end: usize) -> usize { + end / 2 + } + + fn find(&self, separator: &P, start: usize, end: usize) -> Option { + self[start..end].iter().position(separator) + } + + fn rfind(&self, separator: &P, end: usize) -> Option { + self[..end].iter().rposition(separator) + } + + fn split_once(self, index: usize) -> (Self, Self) { + let (left, right) = self.split_at_mut(index); + (left, &mut right[1..]) // skip the separator + } + + fn fold_splits(self, separator: &P, folder: F, skip_last: bool) -> F + where + F: Folder, + Self: Send, + { + let mut split = self.split_mut(separator); + if skip_last { + split.next_back(); + } + folder.consume_iter(split) + } +} diff --git a/src/slice/quicksort.rs b/src/slice/quicksort.rs new file mode 100644 index 0000000..b985073 --- /dev/null +++ b/src/slice/quicksort.rs @@ -0,0 +1,800 @@ +//! Parallel quicksort. +//! +//! This implementation is copied verbatim from `std::slice::sort_unstable` and then parallelized. +//! The only difference from the original is that calls to `recurse` are executed in parallel using +//! `rayon_core::join`. + +use std::cmp; +use std::mem; +use std::ptr; + +/// When dropped, takes the value out of `Option` and writes it into `dest`. +/// +/// This allows us to safely read the pivot into a stack-allocated variable for efficiency, and +/// write it back into the slice after partitioning. This way we ensure that the write happens +/// even if `is_less` panics in the meantime. +struct WriteOnDrop { + value: Option, + dest: *mut T, +} + +impl Drop for WriteOnDrop { + fn drop(&mut self) { + unsafe { + ptr::write(self.dest, self.value.take().unwrap()); + } + } +} + +/// Holds a value, but never drops it. +struct NoDrop { + value: Option, +} + +impl Drop for NoDrop { + fn drop(&mut self) { + mem::forget(self.value.take()); + } +} + +/// When dropped, copies from `src` into `dest`. +struct CopyOnDrop { + src: *mut T, + dest: *mut T, +} + +impl Drop for CopyOnDrop { + fn drop(&mut self) { + unsafe { + ptr::copy_nonoverlapping(self.src, self.dest, 1); + } + } +} + +/// Shifts the first element to the right until it encounters a greater or equal element. +fn shift_head(v: &mut [T], is_less: &F) +where + F: Fn(&T, &T) -> bool, +{ + let len = v.len(); + unsafe { + // If the first two elements are out-of-order... + if len >= 2 && is_less(v.get_unchecked(1), v.get_unchecked(0)) { + // Read the first element into a stack-allocated variable. If a following comparison + // operation panics, `hole` will get dropped and automatically write the element back + // into the slice. + let mut tmp = NoDrop { + value: Some(ptr::read(v.get_unchecked(0))), + }; + let mut hole = CopyOnDrop { + src: tmp.value.as_mut().unwrap(), + dest: v.get_unchecked_mut(1), + }; + ptr::copy_nonoverlapping(v.get_unchecked(1), v.get_unchecked_mut(0), 1); + + for i in 2..len { + if !is_less(v.get_unchecked(i), tmp.value.as_ref().unwrap()) { + break; + } + + // Move `i`-th element one place to the left, thus shifting the hole to the right. + ptr::copy_nonoverlapping(v.get_unchecked(i), v.get_unchecked_mut(i - 1), 1); + hole.dest = v.get_unchecked_mut(i); + } + // `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`. + } + } +} + +/// Shifts the last element to the left until it encounters a smaller or equal element. +fn shift_tail(v: &mut [T], is_less: &F) +where + F: Fn(&T, &T) -> bool, +{ + let len = v.len(); + unsafe { + // If the last two elements are out-of-order... + if len >= 2 && is_less(v.get_unchecked(len - 1), v.get_unchecked(len - 2)) { + // Read the last element into a stack-allocated variable. If a following comparison + // operation panics, `hole` will get dropped and automatically write the element back + // into the slice. + let mut tmp = NoDrop { + value: Some(ptr::read(v.get_unchecked(len - 1))), + }; + let mut hole = CopyOnDrop { + src: tmp.value.as_mut().unwrap(), + dest: v.get_unchecked_mut(len - 2), + }; + ptr::copy_nonoverlapping(v.get_unchecked(len - 2), v.get_unchecked_mut(len - 1), 1); + + for i in (0..len - 2).rev() { + if !is_less(&tmp.value.as_ref().unwrap(), v.get_unchecked(i)) { + break; + } + + // Move `i`-th element one place to the right, thus shifting the hole to the left. + ptr::copy_nonoverlapping(v.get_unchecked(i), v.get_unchecked_mut(i + 1), 1); + hole.dest = v.get_unchecked_mut(i); + } + // `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`. + } + } +} + +/// Partially sorts a slice by shifting several out-of-order elements around. +/// +/// Returns `true` if the slice is sorted at the end. This function is `O(n)` worst-case. +#[cold] +fn partial_insertion_sort(v: &mut [T], is_less: &F) -> bool +where + F: Fn(&T, &T) -> bool, +{ + // Maximum number of adjacent out-of-order pairs that will get shifted. + const MAX_STEPS: usize = 5; + // If the slice is shorter than this, don't shift any elements. + const SHORTEST_SHIFTING: usize = 50; + + let len = v.len(); + let mut i = 1; + + for _ in 0..MAX_STEPS { + unsafe { + // Find the next pair of adjacent out-of-order elements. + while i < len && !is_less(v.get_unchecked(i), v.get_unchecked(i - 1)) { + i += 1; + } + } + + // Are we done? + if i == len { + return true; + } + + // Don't shift elements on short arrays, that has a performance cost. + if len < SHORTEST_SHIFTING { + return false; + } + + // Swap the found pair of elements. This puts them in correct order. + v.swap(i - 1, i); + + // Shift the smaller element to the left. + shift_tail(&mut v[..i], is_less); + // Shift the greater element to the right. + shift_head(&mut v[i..], is_less); + } + + // Didn't manage to sort the slice in the limited number of steps. + false +} + +/// Sorts a slice using insertion sort, which is `O(n^2)` worst-case. +fn insertion_sort(v: &mut [T], is_less: &F) +where + F: Fn(&T, &T) -> bool, +{ + for i in 1..v.len() { + shift_tail(&mut v[..=i], is_less); + } +} + +/// Sorts `v` using heapsort, which guarantees `O(n log n)` worst-case. +#[cold] +fn heapsort(v: &mut [T], is_less: &F) +where + F: Fn(&T, &T) -> bool, +{ + // This binary heap respects the invariant `parent >= child`. + let sift_down = |v: &mut [T], mut node| { + loop { + // Children of `node`: + let left = 2 * node + 1; + let right = 2 * node + 2; + + // Choose the greater child. + let greater = if right < v.len() && is_less(&v[left], &v[right]) { + right + } else { + left + }; + + // Stop if the invariant holds at `node`. + if greater >= v.len() || !is_less(&v[node], &v[greater]) { + break; + } + + // Swap `node` with the greater child, move one step down, and continue sifting. + v.swap(node, greater); + node = greater; + } + }; + + // Build the heap in linear time. + for i in (0..v.len() / 2).rev() { + sift_down(v, i); + } + + // Pop maximal elements from the heap. + for i in (1..v.len()).rev() { + v.swap(0, i); + sift_down(&mut v[..i], 0); + } +} + +/// Partitions `v` into elements smaller than `pivot`, followed by elements greater than or equal +/// to `pivot`. +/// +/// Returns the number of elements smaller than `pivot`. +/// +/// Partitioning is performed block-by-block in order to minimize the cost of branching operations. +/// This idea is presented in the [BlockQuicksort][pdf] paper. +/// +/// [pdf]: http://drops.dagstuhl.de/opus/volltexte/2016/6389/pdf/LIPIcs-ESA-2016-38.pdf +fn partition_in_blocks(v: &mut [T], pivot: &T, is_less: &F) -> usize +where + F: Fn(&T, &T) -> bool, +{ + // Number of elements in a typical block. + const BLOCK: usize = 128; + + // The partitioning algorithm repeats the following steps until completion: + // + // 1. Trace a block from the left side to identify elements greater than or equal to the pivot. + // 2. Trace a block from the right side to identify elements smaller than the pivot. + // 3. Exchange the identified elements between the left and right side. + // + // We keep the following variables for a block of elements: + // + // 1. `block` - Number of elements in the block. + // 2. `start` - Start pointer into the `offsets` array. + // 3. `end` - End pointer into the `offsets` array. + // 4. `offsets - Indices of out-of-order elements within the block. + + // The current block on the left side (from `l` to `l.offset(block_l)`). + let mut l = v.as_mut_ptr(); + let mut block_l = BLOCK; + let mut start_l = ptr::null_mut(); + let mut end_l = ptr::null_mut(); + let mut offsets_l = [0u8; BLOCK]; + + // The current block on the right side (from `r.offset(-block_r)` to `r`). + let mut r = unsafe { l.add(v.len()) }; + let mut block_r = BLOCK; + let mut start_r = ptr::null_mut(); + let mut end_r = ptr::null_mut(); + let mut offsets_r = [0u8; BLOCK]; + + // Returns the number of elements between pointers `l` (inclusive) and `r` (exclusive). + fn width(l: *mut T, r: *mut T) -> usize { + assert!(mem::size_of::() > 0); + (r as usize - l as usize) / mem::size_of::() + } + + loop { + // We are done with partitioning block-by-block when `l` and `r` get very close. Then we do + // some patch-up work in order to partition the remaining elements in between. + let is_done = width(l, r) <= 2 * BLOCK; + + if is_done { + // Number of remaining elements (still not compared to the pivot). + let mut rem = width(l, r); + if start_l < end_l || start_r < end_r { + rem -= BLOCK; + } + + // Adjust block sizes so that the left and right block don't overlap, but get perfectly + // aligned to cover the whole remaining gap. + if start_l < end_l { + block_r = rem; + } else if start_r < end_r { + block_l = rem; + } else { + block_l = rem / 2; + block_r = rem - block_l; + } + debug_assert!(block_l <= BLOCK && block_r <= BLOCK); + debug_assert!(width(l, r) == block_l + block_r); + } + + if start_l == end_l { + // Trace `block_l` elements from the left side. + start_l = offsets_l.as_mut_ptr(); + end_l = offsets_l.as_mut_ptr(); + let mut elem = l; + + for i in 0..block_l { + unsafe { + // Branchless comparison. + *end_l = i as u8; + end_l = end_l.offset(!is_less(&*elem, pivot) as isize); + elem = elem.offset(1); + } + } + } + + if start_r == end_r { + // Trace `block_r` elements from the right side. + start_r = offsets_r.as_mut_ptr(); + end_r = offsets_r.as_mut_ptr(); + let mut elem = r; + + for i in 0..block_r { + unsafe { + // Branchless comparison. + elem = elem.offset(-1); + *end_r = i as u8; + end_r = end_r.offset(is_less(&*elem, pivot) as isize); + } + } + } + + // Number of out-of-order elements to swap between the left and right side. + let count = cmp::min(width(start_l, end_l), width(start_r, end_r)); + + if count > 0 { + macro_rules! left { + () => { + l.offset(*start_l as isize) + }; + } + macro_rules! right { + () => { + r.offset(-(*start_r as isize) - 1) + }; + } + + // Instead of swapping one pair at the time, it is more efficient to perform a cyclic + // permutation. This is not strictly equivalent to swapping, but produces a similar + // result using fewer memory operations. + unsafe { + let tmp = ptr::read(left!()); + ptr::copy_nonoverlapping(right!(), left!(), 1); + + for _ in 1..count { + start_l = start_l.offset(1); + ptr::copy_nonoverlapping(left!(), right!(), 1); + start_r = start_r.offset(1); + ptr::copy_nonoverlapping(right!(), left!(), 1); + } + + ptr::copy_nonoverlapping(&tmp, right!(), 1); + mem::forget(tmp); + start_l = start_l.offset(1); + start_r = start_r.offset(1); + } + } + + if start_l == end_l { + // All out-of-order elements in the left block were moved. Move to the next block. + l = unsafe { l.add(block_l) }; + } + + if start_r == end_r { + // All out-of-order elements in the right block were moved. Move to the previous block. + r = unsafe { r.sub(block_r) }; + } + + if is_done { + break; + } + } + + // All that remains now is at most one block (either the left or the right) with out-of-order + // elements that need to be moved. Such remaining elements can be simply shifted to the end + // within their block. + + if start_l < end_l { + // The left block remains. + // Move it's remaining out-of-order elements to the far right. + debug_assert_eq!(width(l, r), block_l); + while start_l < end_l { + unsafe { + end_l = end_l.offset(-1); + ptr::swap(l.offset(*end_l as isize), r.offset(-1)); + r = r.offset(-1); + } + } + width(v.as_mut_ptr(), r) + } else if start_r < end_r { + // The right block remains. + // Move it's remaining out-of-order elements to the far left. + debug_assert_eq!(width(l, r), block_r); + while start_r < end_r { + unsafe { + end_r = end_r.offset(-1); + ptr::swap(l, r.offset(-(*end_r as isize) - 1)); + l = l.offset(1); + } + } + width(v.as_mut_ptr(), l) + } else { + // Nothing else to do, we're done. + width(v.as_mut_ptr(), l) + } +} + +/// Partitions `v` into elements smaller than `v[pivot]`, followed by elements greater than or +/// equal to `v[pivot]`. +/// +/// Returns a tuple of: +/// +/// 1. Number of elements smaller than `v[pivot]`. +/// 2. True if `v` was already partitioned. +fn partition(v: &mut [T], pivot: usize, is_less: &F) -> (usize, bool) +where + F: Fn(&T, &T) -> bool, +{ + let (mid, was_partitioned) = { + // Place the pivot at the beginning of slice. + v.swap(0, pivot); + let (pivot, v) = v.split_at_mut(1); + let pivot = &mut pivot[0]; + + // Read the pivot into a stack-allocated variable for efficiency. If a following comparison + // operation panics, the pivot will be automatically written back into the slice. + let write_on_drop = WriteOnDrop { + value: unsafe { Some(ptr::read(pivot)) }, + dest: pivot, + }; + let pivot = write_on_drop.value.as_ref().unwrap(); + + // Find the first pair of out-of-order elements. + let mut l = 0; + let mut r = v.len(); + unsafe { + // Find the first element greater then or equal to the pivot. + while l < r && is_less(v.get_unchecked(l), pivot) { + l += 1; + } + + // Find the last element smaller that the pivot. + while l < r && !is_less(v.get_unchecked(r - 1), pivot) { + r -= 1; + } + } + + ( + l + partition_in_blocks(&mut v[l..r], pivot, is_less), + l >= r, + ) + + // `write_on_drop` goes out of scope and writes the pivot (which is a stack-allocated + // variable) back into the slice where it originally was. This step is critical in ensuring + // safety! + }; + + // Place the pivot between the two partitions. + v.swap(0, mid); + + (mid, was_partitioned) +} + +/// Partitions `v` into elements equal to `v[pivot]` followed by elements greater than `v[pivot]`. +/// +/// Returns the number of elements equal to the pivot. It is assumed that `v` does not contain +/// elements smaller than the pivot. +fn partition_equal(v: &mut [T], pivot: usize, is_less: &F) -> usize +where + F: Fn(&T, &T) -> bool, +{ + // Place the pivot at the beginning of slice. + v.swap(0, pivot); + let (pivot, v) = v.split_at_mut(1); + let pivot = &mut pivot[0]; + + // Read the pivot into a stack-allocated variable for efficiency. If a following comparison + // operation panics, the pivot will be automatically written back into the slice. + let write_on_drop = WriteOnDrop { + value: unsafe { Some(ptr::read(pivot)) }, + dest: pivot, + }; + let pivot = write_on_drop.value.as_ref().unwrap(); + + // Now partition the slice. + let mut l = 0; + let mut r = v.len(); + loop { + unsafe { + // Find the first element greater that the pivot. + while l < r && !is_less(pivot, v.get_unchecked(l)) { + l += 1; + } + + // Find the last element equal to the pivot. + while l < r && is_less(pivot, v.get_unchecked(r - 1)) { + r -= 1; + } + + // Are we done? + if l >= r { + break; + } + + // Swap the found pair of out-of-order elements. + r -= 1; + ptr::swap(v.get_unchecked_mut(l), v.get_unchecked_mut(r)); + l += 1; + } + } + + // We found `l` elements equal to the pivot. Add 1 to account for the pivot itself. + l + 1 + + // `write_on_drop` goes out of scope and writes the pivot (which is a stack-allocated variable) + // back into the slice where it originally was. This step is critical in ensuring safety! +} + +/// Scatters some elements around in an attempt to break patterns that might cause imbalanced +/// partitions in quicksort. +#[cold] +fn break_patterns(v: &mut [T]) { + let len = v.len(); + if len >= 8 { + // Pseudorandom number generator from the "Xorshift RNGs" paper by George Marsaglia. + let mut random = len as u32; + let mut gen_u32 = || { + random ^= random << 13; + random ^= random >> 17; + random ^= random << 5; + random + }; + let mut gen_usize = || { + if mem::size_of::() <= 4 { + gen_u32() as usize + } else { + ((u64::from(gen_u32()) << 32) | u64::from(gen_u32())) as usize + } + }; + + // Take random numbers modulo this number. + // The number fits into `usize` because `len` is not greater than `isize::MAX`. + let modulus = len.next_power_of_two(); + + // Some pivot candidates will be in the nearby of this index. Let's randomize them. + let pos = len / 4 * 2; + + for i in 0..3 { + // Generate a random number modulo `len`. However, in order to avoid costly operations + // we first take it modulo a power of two, and then decrease by `len` until it fits + // into the range `[0, len - 1]`. + let mut other = gen_usize() & (modulus - 1); + + // `other` is guaranteed to be less than `2 * len`. + if other >= len { + other -= len; + } + + v.swap(pos - 1 + i, other); + } + } +} + +/// Chooses a pivot in `v` and returns the index and `true` if the slice is likely already sorted. +/// +/// Elements in `v` might be reordered in the process. +fn choose_pivot(v: &mut [T], is_less: &F) -> (usize, bool) +where + F: Fn(&T, &T) -> bool, +{ + // Minimum length to choose the median-of-medians method. + // Shorter slices use the simple median-of-three method. + const SHORTEST_MEDIAN_OF_MEDIANS: usize = 50; + // Maximum number of swaps that can be performed in this function. + const MAX_SWAPS: usize = 4 * 3; + + let len = v.len(); + + // Three indices near which we are going to choose a pivot. + let mut a = len / 4 * 1; + let mut b = len / 4 * 2; + let mut c = len / 4 * 3; + + // Counts the total number of swaps we are about to perform while sorting indices. + let mut swaps = 0; + + if len >= 8 { + // Swaps indices so that `v[a] <= v[b]`. + let mut sort2 = |a: &mut usize, b: &mut usize| unsafe { + if is_less(v.get_unchecked(*b), v.get_unchecked(*a)) { + ptr::swap(a, b); + swaps += 1; + } + }; + + // Swaps indices so that `v[a] <= v[b] <= v[c]`. + let mut sort3 = |a: &mut usize, b: &mut usize, c: &mut usize| { + sort2(a, b); + sort2(b, c); + sort2(a, b); + }; + + if len >= SHORTEST_MEDIAN_OF_MEDIANS { + // Finds the median of `v[a - 1], v[a], v[a + 1]` and stores the index into `a`. + let mut sort_adjacent = |a: &mut usize| { + let tmp = *a; + sort3(&mut (tmp - 1), a, &mut (tmp + 1)); + }; + + // Find medians in the neighborhoods of `a`, `b`, and `c`. + sort_adjacent(&mut a); + sort_adjacent(&mut b); + sort_adjacent(&mut c); + } + + // Find the median among `a`, `b`, and `c`. + sort3(&mut a, &mut b, &mut c); + } + + if swaps < MAX_SWAPS { + (b, swaps == 0) + } else { + // The maximum number of swaps was performed. Chances are the slice is descending or mostly + // descending, so reversing will probably help sort it faster. + v.reverse(); + (len - 1 - b, true) + } +} + +/// Sorts `v` recursively. +/// +/// If the slice had a predecessor in the original array, it is specified as `pred`. +/// +/// `limit` is the number of allowed imbalanced partitions before switching to `heapsort`. If zero, +/// this function will immediately switch to heapsort. +fn recurse<'a, T, F>(mut v: &'a mut [T], is_less: &F, mut pred: Option<&'a mut T>, mut limit: usize) +where + T: Send, + F: Fn(&T, &T) -> bool + Sync, +{ + // Slices of up to this length get sorted using insertion sort. + const MAX_INSERTION: usize = 20; + // If both partitions are up to this length, we continue sequentially. This number is as small + // as possible but so that the overhead of Rayon's task scheduling is still negligible. + const MAX_SEQUENTIAL: usize = 2000; + + // True if the last partitioning was reasonably balanced. + let mut was_balanced = true; + // True if the last partitioning didn't shuffle elements (the slice was already partitioned). + let mut was_partitioned = true; + + loop { + let len = v.len(); + + // Very short slices get sorted using insertion sort. + if len <= MAX_INSERTION { + insertion_sort(v, is_less); + return; + } + + // If too many bad pivot choices were made, simply fall back to heapsort in order to + // guarantee `O(n log n)` worst-case. + if limit == 0 { + heapsort(v, is_less); + return; + } + + // If the last partitioning was imbalanced, try breaking patterns in the slice by shuffling + // some elements around. Hopefully we'll choose a better pivot this time. + if !was_balanced { + break_patterns(v); + limit -= 1; + } + + // Choose a pivot and try guessing whether the slice is already sorted. + let (pivot, likely_sorted) = choose_pivot(v, is_less); + + // If the last partitioning was decently balanced and didn't shuffle elements, and if pivot + // selection predicts the slice is likely already sorted... + if was_balanced && was_partitioned && likely_sorted { + // Try identifying several out-of-order elements and shifting them to correct + // positions. If the slice ends up being completely sorted, we're done. + if partial_insertion_sort(v, is_less) { + return; + } + } + + // If the chosen pivot is equal to the predecessor, then it's the smallest element in the + // slice. Partition the slice into elements equal to and elements greater than the pivot. + // This case is usually hit when the slice contains many duplicate elements. + if let Some(ref p) = pred { + if !is_less(p, &v[pivot]) { + let mid = partition_equal(v, pivot, is_less); + + // Continue sorting elements greater than the pivot. + v = &mut { v }[mid..]; + continue; + } + } + + // Partition the slice. + let (mid, was_p) = partition(v, pivot, is_less); + was_balanced = cmp::min(mid, len - mid) >= len / 8; + was_partitioned = was_p; + + // Split the slice into `left`, `pivot`, and `right`. + let (left, right) = { v }.split_at_mut(mid); + let (pivot, right) = right.split_at_mut(1); + let pivot = &mut pivot[0]; + + if cmp::max(left.len(), right.len()) <= MAX_SEQUENTIAL { + // Recurse into the shorter side only in order to minimize the total number of recursive + // calls and consume less stack space. Then just continue with the longer side (this is + // akin to tail recursion). + if left.len() < right.len() { + recurse(left, is_less, pred, limit); + v = right; + pred = Some(pivot); + } else { + recurse(right, is_less, Some(pivot), limit); + v = left; + } + } else { + // Sort the left and right half in parallel. + rayon_core::join( + || recurse(left, is_less, pred, limit), + || recurse(right, is_less, Some(pivot), limit), + ); + break; + } + } +} + +/// Sorts `v` using pattern-defeating quicksort in parallel. +/// +/// The algorithm is unstable, in-place, and `O(n log n)` worst-case. +pub(super) fn par_quicksort(v: &mut [T], is_less: F) +where + T: Send, + F: Fn(&T, &T) -> bool + Sync, +{ + // Sorting has no meaningful behavior on zero-sized types. + if mem::size_of::() == 0 { + return; + } + + // Limit the number of imbalanced partitions to `floor(log2(len)) + 1`. + let limit = mem::size_of::() * 8 - v.len().leading_zeros() as usize; + + recurse(v, &is_less, None, limit); +} + +#[cfg(test)] +mod tests { + use super::heapsort; + use rand::distributions::Uniform; + use rand::{thread_rng, Rng}; + + #[test] + fn test_heapsort() { + let rng = thread_rng(); + + for len in (0..25).chain(500..501) { + for &modulus in &[5, 10, 100] { + let dist = Uniform::new(0, modulus); + for _ in 0..100 { + let v: Vec = rng.sample_iter(&dist).take(len).collect(); + + // Test heapsort using `<` operator. + let mut tmp = v.clone(); + heapsort(&mut tmp, &|a, b| a < b); + assert!(tmp.windows(2).all(|w| w[0] <= w[1])); + + // Test heapsort using `>` operator. + let mut tmp = v.clone(); + heapsort(&mut tmp, &|a, b| a > b); + assert!(tmp.windows(2).all(|w| w[0] >= w[1])); + } + } + } + + // Sort using a completely random comparison function. + // This will reorder the elements *somehow*, but won't panic. + let mut v: Vec<_> = (0..100).collect(); + heapsort(&mut v, &|_, _| thread_rng().gen()); + heapsort(&mut v, &|a, b| a < b); + + for i in 0..v.len() { + assert_eq!(v[i], i); + } + } +} diff --git a/src/slice/test.rs b/src/slice/test.rs new file mode 100644 index 0000000..97de7d8 --- /dev/null +++ b/src/slice/test.rs @@ -0,0 +1,148 @@ +#![cfg(test)] + +use crate::prelude::*; +use rand::distributions::Uniform; +use rand::seq::SliceRandom; +use rand::{thread_rng, Rng}; +use std::cmp::Ordering::{Equal, Greater, Less}; + +macro_rules! sort { + ($f:ident, $name:ident) => { + #[test] + fn $name() { + let mut rng = thread_rng(); + + for len in (0..25).chain(500..501) { + for &modulus in &[5, 10, 100] { + let dist = Uniform::new(0, modulus); + for _ in 0..100 { + let v: Vec = rng.sample_iter(&dist).take(len).collect(); + + // Test sort using `<` operator. + let mut tmp = v.clone(); + tmp.$f(|a, b| a.cmp(b)); + assert!(tmp.windows(2).all(|w| w[0] <= w[1])); + + // Test sort using `>` operator. + let mut tmp = v.clone(); + tmp.$f(|a, b| b.cmp(a)); + assert!(tmp.windows(2).all(|w| w[0] >= w[1])); + } + } + } + + // Test sort with many duplicates. + for &len in &[1_000, 10_000, 100_000] { + for &modulus in &[5, 10, 100, 10_000] { + let dist = Uniform::new(0, modulus); + let mut v: Vec = rng.sample_iter(&dist).take(len).collect(); + + v.$f(|a, b| a.cmp(b)); + assert!(v.windows(2).all(|w| w[0] <= w[1])); + } + } + + // Test sort with many pre-sorted runs. + for &len in &[1_000, 10_000, 100_000] { + let len_dist = Uniform::new(0, len); + for &modulus in &[5, 10, 1000, 50_000] { + let dist = Uniform::new(0, modulus); + let mut v: Vec = rng.sample_iter(&dist).take(len).collect(); + + v.sort(); + v.reverse(); + + for _ in 0..5 { + let a = rng.sample(&len_dist); + let b = rng.sample(&len_dist); + if a < b { + v[a..b].reverse(); + } else { + v.swap(a, b); + } + } + + v.$f(|a, b| a.cmp(b)); + assert!(v.windows(2).all(|w| w[0] <= w[1])); + } + } + + // Sort using a completely random comparison function. + // This will reorder the elements *somehow*, but won't panic. + let mut v: Vec<_> = (0..100).collect(); + v.$f(|_, _| *[Less, Equal, Greater].choose(&mut thread_rng()).unwrap()); + v.$f(|a, b| a.cmp(b)); + for i in 0..v.len() { + assert_eq!(v[i], i); + } + + // Should not panic. + [0i32; 0].$f(|a, b| a.cmp(b)); + [(); 10].$f(|a, b| a.cmp(b)); + [(); 100].$f(|a, b| a.cmp(b)); + + let mut v = [0xDEAD_BEEFu64]; + v.$f(|a, b| a.cmp(b)); + assert!(v == [0xDEAD_BEEF]); + } + }; +} + +sort!(par_sort_by, test_par_sort); +sort!(par_sort_unstable_by, test_par_sort_unstable); + +#[test] +fn test_par_sort_stability() { + for len in (2..25).chain(500..510).chain(50_000..50_010) { + for _ in 0..10 { + let mut counts = [0; 10]; + + // Create a vector like [(6, 1), (5, 1), (6, 2), ...], + // where the first item of each tuple is random, but + // the second item represents which occurrence of that + // number this element is, i.e. the second elements + // will occur in sorted order. + let mut rng = thread_rng(); + let mut v: Vec<_> = (0..len) + .map(|_| { + let n: usize = rng.gen_range(0, 10); + counts[n] += 1; + (n, counts[n]) + }) + .collect(); + + // Only sort on the first element, so an unstable sort + // may mix up the counts. + v.par_sort_by(|&(a, _), &(b, _)| a.cmp(&b)); + + // This comparison includes the count (the second item + // of the tuple), so elements with equal first items + // will need to be ordered with increasing + // counts... i.e. exactly asserting that this sort is + // stable. + assert!(v.windows(2).all(|w| w[0] <= w[1])); + } + } +} + +#[test] +fn test_par_chunks_exact_remainder() { + let v: &[i32] = &[0, 1, 2, 3, 4]; + let c = v.par_chunks_exact(2); + assert_eq!(c.remainder(), &[4]); + assert_eq!(c.len(), 2); +} + +#[test] +fn test_par_chunks_exact_mut_remainder() { + let v: &mut [i32] = &mut [0, 1, 2, 3, 4]; + let mut c = v.par_chunks_exact_mut(2); + assert_eq!(c.remainder(), &[4]); + assert_eq!(c.len(), 2); + assert_eq!(c.into_remainder(), &[4]); + + let mut c = v.par_chunks_exact_mut(2); + assert_eq!(c.take_remainder(), &[4]); + assert_eq!(c.take_remainder(), &[]); + assert_eq!(c.len(), 2); +} diff --git a/src/split_producer.rs b/src/split_producer.rs new file mode 100644 index 0000000..568657a --- /dev/null +++ b/src/split_producer.rs @@ -0,0 +1,132 @@ +//! Common splitter for strings and slices +//! +//! This module is private, so these items are effectively `pub(super)` + +use crate::iter::plumbing::{Folder, UnindexedProducer}; + +/// Common producer for splitting on a predicate. +pub(super) struct SplitProducer<'p, P, V> { + data: V, + separator: &'p P, + + /// Marks the endpoint beyond which we've already found no separators. + tail: usize, +} + +/// Helper trait so `&str`, `&[T]`, and `&mut [T]` can share `SplitProducer`. +pub(super) trait Fissile

: Sized { + fn length(&self) -> usize; + fn midpoint(&self, end: usize) -> usize; + fn find(&self, separator: &P, start: usize, end: usize) -> Option; + fn rfind(&self, separator: &P, end: usize) -> Option; + fn split_once(self, index: usize) -> (Self, Self); + fn fold_splits(self, separator: &P, folder: F, skip_last: bool) -> F + where + F: Folder, + Self: Send; +} + +impl<'p, P, V> SplitProducer<'p, P, V> +where + V: Fissile

+ Send, +{ + pub(super) fn new(data: V, separator: &'p P) -> Self { + SplitProducer { + tail: data.length(), + data, + separator, + } + } + + /// Common `fold_with` implementation, integrating `SplitTerminator`'s + /// need to sometimes skip its final empty item. + pub(super) fn fold_with(self, folder: F, skip_last: bool) -> F + where + F: Folder, + { + let SplitProducer { + data, + separator, + tail, + } = self; + + if tail == data.length() { + // No tail section, so just let `fold_splits` handle it. + data.fold_splits(separator, folder, skip_last) + } else if let Some(index) = data.rfind(separator, tail) { + // We found the last separator to complete the tail, so + // end with that slice after `fold_splits` finds the rest. + let (left, right) = data.split_once(index); + let folder = left.fold_splits(separator, folder, false); + if skip_last || folder.full() { + folder + } else { + folder.consume(right) + } + } else { + // We know there are no separators at all. Return our whole data. + if skip_last { + folder + } else { + folder.consume(data) + } + } + } +} + +impl<'p, P, V> UnindexedProducer for SplitProducer<'p, P, V> +where + V: Fissile

+ Send, + P: Sync, +{ + type Item = V; + + fn split(self) -> (Self, Option) { + // Look forward for the separator, and failing that look backward. + let mid = self.data.midpoint(self.tail); + let index = match self.data.find(self.separator, mid, self.tail) { + Some(i) => Some(mid + i), + None => self.data.rfind(self.separator, mid), + }; + + if let Some(index) = index { + let len = self.data.length(); + let (left, right) = self.data.split_once(index); + + let (left_tail, right_tail) = if index < mid { + // If we scanned backwards to find the separator, everything in + // the right side is exhausted, with no separators left to find. + (index, 0) + } else { + let right_index = len - right.length(); + (mid, self.tail - right_index) + }; + + // Create the left split before the separator. + let left = SplitProducer { + data: left, + tail: left_tail, + ..self + }; + + // Create the right split following the separator. + let right = SplitProducer { + data: right, + tail: right_tail, + ..self + }; + + (left, Some(right)) + } else { + // The search is exhausted, no more separators... + (SplitProducer { tail: 0, ..self }, None) + } + } + + fn fold_with(self, folder: F) -> F + where + F: Folder, + { + self.fold_with(folder, false) + } +} diff --git a/src/str.rs b/src/str.rs new file mode 100644 index 0000000..2fdaaa7 --- /dev/null +++ b/src/str.rs @@ -0,0 +1,874 @@ +//! Parallel iterator types for [strings][std::str] +//! +//! You will rarely need to interact with this module directly unless you need +//! to name one of the iterator types. +//! +//! Note: [`ParallelString::par_split()`] and [`par_split_terminator()`] +//! reference a `Pattern` trait which is not visible outside this crate. +//! This trait is intentionally kept private, for use only by Rayon itself. +//! It is implemented for `char` and any `F: Fn(char) -> bool + Sync + Send`. +//! +//! [`ParallelString::par_split()`]: trait.ParallelString.html#method.par_split +//! [`par_split_terminator()`]: trait.ParallelString.html#method.par_split_terminator +//! +//! [std::str]: https://doc.rust-lang.org/stable/std/str/ + +use crate::iter::plumbing::*; +use crate::iter::*; +use crate::split_producer::*; + +/// Test if a byte is the start of a UTF-8 character. +/// (extracted from `str::is_char_boundary`) +#[inline] +fn is_char_boundary(b: u8) -> bool { + // This is bit magic equivalent to: b < 128 || b >= 192 + (b as i8) >= -0x40 +} + +/// Find the index of a character boundary near the midpoint. +#[inline] +fn find_char_midpoint(chars: &str) -> usize { + let mid = chars.len() / 2; + + // We want to split near the midpoint, but we need to find an actual + // character boundary. So we look at the raw bytes, first scanning + // forward from the midpoint for a boundary, then trying backward. + let (left, right) = chars.as_bytes().split_at(mid); + match right.iter().cloned().position(is_char_boundary) { + Some(i) => mid + i, + None => left + .iter() + .cloned() + .rposition(is_char_boundary) + .unwrap_or(0), + } +} + +/// Try to split a string near the midpoint. +#[inline] +fn split(chars: &str) -> Option<(&str, &str)> { + let index = find_char_midpoint(chars); + if index > 0 { + Some(chars.split_at(index)) + } else { + None + } +} + +/// Parallel extensions for strings. +pub trait ParallelString { + /// Returns a plain string slice, which is used to implement the rest of + /// the parallel methods. + fn as_parallel_string(&self) -> &str; + + /// Returns a parallel iterator over the characters of a string. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// let max = "hello".par_chars().max_by_key(|c| *c as i32); + /// assert_eq!(Some('o'), max); + /// ``` + fn par_chars(&self) -> Chars<'_> { + Chars { + chars: self.as_parallel_string(), + } + } + + /// Returns a parallel iterator over the characters of a string, with their positions. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// let min = "hello".par_char_indices().min_by_key(|&(_i, c)| c as i32); + /// assert_eq!(Some((1, 'e')), min); + /// ``` + fn par_char_indices(&self) -> CharIndices<'_> { + CharIndices { + chars: self.as_parallel_string(), + } + } + + /// Returns a parallel iterator over the bytes of a string. + /// + /// Note that multi-byte sequences (for code points greater than `U+007F`) + /// are produced as separate items, but will not be split across threads. + /// If you would prefer an indexed iterator without that guarantee, consider + /// `string.as_bytes().par_iter().cloned()` instead. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// let max = "hello".par_bytes().max(); + /// assert_eq!(Some(b'o'), max); + /// ``` + fn par_bytes(&self) -> Bytes<'_> { + Bytes { + chars: self.as_parallel_string(), + } + } + + /// Returns a parallel iterator over a string encoded as UTF-16. + /// + /// Note that surrogate pairs (for code points greater than `U+FFFF`) are + /// produced as separate items, but will not be split across threads. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// + /// let max = "hello".par_encode_utf16().max(); + /// assert_eq!(Some(b'o' as u16), max); + /// + /// let text = "Zażółć gęślą jaźń"; + /// let utf8_len = text.len(); + /// let utf16_len = text.par_encode_utf16().count(); + /// assert!(utf16_len <= utf8_len); + /// ``` + fn par_encode_utf16(&self) -> EncodeUtf16<'_> { + EncodeUtf16 { + chars: self.as_parallel_string(), + } + } + + /// Returns a parallel iterator over substrings separated by a + /// given character or predicate, similar to `str::split`. + /// + /// Note: the `Pattern` trait is private, for use only by Rayon itself. + /// It is implemented for `char` and any `F: Fn(char) -> bool + Sync + Send`. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// let total = "1, 2, buckle, 3, 4, door" + /// .par_split(',') + /// .filter_map(|s| s.trim().parse::().ok()) + /// .sum(); + /// assert_eq!(10, total); + /// ``` + fn par_split(&self, separator: P) -> Split<'_, P> { + Split::new(self.as_parallel_string(), separator) + } + + /// Returns a parallel iterator over substrings terminated by a + /// given character or predicate, similar to `str::split_terminator`. + /// It's equivalent to `par_split`, except it doesn't produce an empty + /// substring after a trailing terminator. + /// + /// Note: the `Pattern` trait is private, for use only by Rayon itself. + /// It is implemented for `char` and any `F: Fn(char) -> bool + Sync + Send`. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// let parts: Vec<_> = "((1 + 3) * 2)" + /// .par_split_terminator(|c| c == '(' || c == ')') + /// .collect(); + /// assert_eq!(vec!["", "", "1 + 3", " * 2"], parts); + /// ``` + fn par_split_terminator(&self, terminator: P) -> SplitTerminator<'_, P> { + SplitTerminator::new(self.as_parallel_string(), terminator) + } + + /// Returns a parallel iterator over the lines of a string, ending with an + /// optional carriage return and with a newline (`\r\n` or just `\n`). + /// The final line ending is optional, and line endings are not included in + /// the output strings. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// let lengths: Vec<_> = "hello world\nfizbuzz" + /// .par_lines() + /// .map(|l| l.len()) + /// .collect(); + /// assert_eq!(vec![11, 7], lengths); + /// ``` + fn par_lines(&self) -> Lines<'_> { + Lines(self.as_parallel_string()) + } + + /// Returns a parallel iterator over the sub-slices of a string that are + /// separated by any amount of whitespace. + /// + /// As with `str::split_whitespace`, 'whitespace' is defined according to + /// the terms of the Unicode Derived Core Property `White_Space`. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// let longest = "which is the longest word?" + /// .par_split_whitespace() + /// .max_by_key(|word| word.len()); + /// assert_eq!(Some("longest"), longest); + /// ``` + fn par_split_whitespace(&self) -> SplitWhitespace<'_> { + SplitWhitespace(self.as_parallel_string()) + } + + /// Returns a parallel iterator over substrings that match a + /// given character or predicate, similar to `str::matches`. + /// + /// Note: the `Pattern` trait is private, for use only by Rayon itself. + /// It is implemented for `char` and any `F: Fn(char) -> bool + Sync + Send`. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// let total = "1, 2, buckle, 3, 4, door" + /// .par_matches(char::is_numeric) + /// .map(|s| s.parse::().expect("digit")) + /// .sum(); + /// assert_eq!(10, total); + /// ``` + fn par_matches(&self, pattern: P) -> Matches<'_, P> { + Matches { + chars: self.as_parallel_string(), + pattern, + } + } + + /// Returns a parallel iterator over substrings that match a given character + /// or predicate, with their positions, similar to `str::match_indices`. + /// + /// Note: the `Pattern` trait is private, for use only by Rayon itself. + /// It is implemented for `char` and any `F: Fn(char) -> bool + Sync + Send`. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// let digits: Vec<_> = "1, 2, buckle, 3, 4, door" + /// .par_match_indices(char::is_numeric) + /// .collect(); + /// assert_eq!(digits, vec![(0, "1"), (3, "2"), (14, "3"), (17, "4")]); + /// ``` + fn par_match_indices(&self, pattern: P) -> MatchIndices<'_, P> { + MatchIndices { + chars: self.as_parallel_string(), + pattern, + } + } +} + +impl ParallelString for str { + #[inline] + fn as_parallel_string(&self) -> &str { + self + } +} + +// ///////////////////////////////////////////////////////////////////////// + +/// We hide the `Pattern` trait in a private module, as its API is not meant +/// for general consumption. If we could have privacy on trait items, then it +/// would be nicer to have its basic existence and implementors public while +/// keeping all of the methods private. +mod private { + use crate::iter::plumbing::Folder; + + /// Pattern-matching trait for `ParallelString`, somewhat like a mix of + /// `std::str::pattern::{Pattern, Searcher}`. + /// + /// Implementing this trait is not permitted outside of `rayon`. + pub trait Pattern: Sized + Sync + Send { + private_decl! {} + fn find_in(&self, haystack: &str) -> Option; + fn rfind_in(&self, haystack: &str) -> Option; + fn is_suffix_of(&self, haystack: &str) -> bool; + fn fold_splits<'ch, F>(&self, haystack: &'ch str, folder: F, skip_last: bool) -> F + where + F: Folder<&'ch str>; + fn fold_matches<'ch, F>(&self, haystack: &'ch str, folder: F) -> F + where + F: Folder<&'ch str>; + fn fold_match_indices<'ch, F>(&self, haystack: &'ch str, folder: F, base: usize) -> F + where + F: Folder<(usize, &'ch str)>; + } +} +use self::private::Pattern; + +#[inline] +fn offset(base: usize) -> impl Fn((usize, T)) -> (usize, T) { + move |(i, x)| (base + i, x) +} + +impl Pattern for char { + private_impl! {} + + #[inline] + fn find_in(&self, chars: &str) -> Option { + chars.find(*self) + } + + #[inline] + fn rfind_in(&self, chars: &str) -> Option { + chars.rfind(*self) + } + + #[inline] + fn is_suffix_of(&self, chars: &str) -> bool { + chars.ends_with(*self) + } + + fn fold_splits<'ch, F>(&self, chars: &'ch str, folder: F, skip_last: bool) -> F + where + F: Folder<&'ch str>, + { + let mut split = chars.split(*self); + if skip_last { + split.next_back(); + } + folder.consume_iter(split) + } + + fn fold_matches<'ch, F>(&self, chars: &'ch str, folder: F) -> F + where + F: Folder<&'ch str>, + { + folder.consume_iter(chars.matches(*self)) + } + + fn fold_match_indices<'ch, F>(&self, chars: &'ch str, folder: F, base: usize) -> F + where + F: Folder<(usize, &'ch str)>, + { + folder.consume_iter(chars.match_indices(*self).map(offset(base))) + } +} + +impl bool> Pattern for FN { + private_impl! {} + + fn find_in(&self, chars: &str) -> Option { + chars.find(self) + } + + fn rfind_in(&self, chars: &str) -> Option { + chars.rfind(self) + } + + fn is_suffix_of(&self, chars: &str) -> bool { + chars.ends_with(self) + } + + fn fold_splits<'ch, F>(&self, chars: &'ch str, folder: F, skip_last: bool) -> F + where + F: Folder<&'ch str>, + { + let mut split = chars.split(self); + if skip_last { + split.next_back(); + } + folder.consume_iter(split) + } + + fn fold_matches<'ch, F>(&self, chars: &'ch str, folder: F) -> F + where + F: Folder<&'ch str>, + { + folder.consume_iter(chars.matches(self)) + } + + fn fold_match_indices<'ch, F>(&self, chars: &'ch str, folder: F, base: usize) -> F + where + F: Folder<(usize, &'ch str)>, + { + folder.consume_iter(chars.match_indices(self).map(offset(base))) + } +} + +// ///////////////////////////////////////////////////////////////////////// + +/// Parallel iterator over the characters of a string +#[derive(Debug, Clone)] +pub struct Chars<'ch> { + chars: &'ch str, +} + +struct CharsProducer<'ch> { + chars: &'ch str, +} + +impl<'ch> ParallelIterator for Chars<'ch> { + type Item = char; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + bridge_unindexed(CharsProducer { chars: self.chars }, consumer) + } +} + +impl<'ch> UnindexedProducer for CharsProducer<'ch> { + type Item = char; + + fn split(self) -> (Self, Option) { + match split(self.chars) { + Some((left, right)) => ( + CharsProducer { chars: left }, + Some(CharsProducer { chars: right }), + ), + None => (self, None), + } + } + + fn fold_with(self, folder: F) -> F + where + F: Folder, + { + folder.consume_iter(self.chars.chars()) + } +} + +// ///////////////////////////////////////////////////////////////////////// + +/// Parallel iterator over the characters of a string, with their positions +#[derive(Debug, Clone)] +pub struct CharIndices<'ch> { + chars: &'ch str, +} + +struct CharIndicesProducer<'ch> { + index: usize, + chars: &'ch str, +} + +impl<'ch> ParallelIterator for CharIndices<'ch> { + type Item = (usize, char); + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + let producer = CharIndicesProducer { + index: 0, + chars: self.chars, + }; + bridge_unindexed(producer, consumer) + } +} + +impl<'ch> UnindexedProducer for CharIndicesProducer<'ch> { + type Item = (usize, char); + + fn split(self) -> (Self, Option) { + match split(self.chars) { + Some((left, right)) => ( + CharIndicesProducer { + chars: left, + ..self + }, + Some(CharIndicesProducer { + chars: right, + index: self.index + left.len(), + }), + ), + None => (self, None), + } + } + + fn fold_with(self, folder: F) -> F + where + F: Folder, + { + let base = self.index; + folder.consume_iter(self.chars.char_indices().map(offset(base))) + } +} + +// ///////////////////////////////////////////////////////////////////////// + +/// Parallel iterator over the bytes of a string +#[derive(Debug, Clone)] +pub struct Bytes<'ch> { + chars: &'ch str, +} + +struct BytesProducer<'ch> { + chars: &'ch str, +} + +impl<'ch> ParallelIterator for Bytes<'ch> { + type Item = u8; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + bridge_unindexed(BytesProducer { chars: self.chars }, consumer) + } +} + +impl<'ch> UnindexedProducer for BytesProducer<'ch> { + type Item = u8; + + fn split(self) -> (Self, Option) { + match split(self.chars) { + Some((left, right)) => ( + BytesProducer { chars: left }, + Some(BytesProducer { chars: right }), + ), + None => (self, None), + } + } + + fn fold_with(self, folder: F) -> F + where + F: Folder, + { + folder.consume_iter(self.chars.bytes()) + } +} + +// ///////////////////////////////////////////////////////////////////////// + +/// Parallel iterator over a string encoded as UTF-16 +#[derive(Debug, Clone)] +pub struct EncodeUtf16<'ch> { + chars: &'ch str, +} + +struct EncodeUtf16Producer<'ch> { + chars: &'ch str, +} + +impl<'ch> ParallelIterator for EncodeUtf16<'ch> { + type Item = u16; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + bridge_unindexed(EncodeUtf16Producer { chars: self.chars }, consumer) + } +} + +impl<'ch> UnindexedProducer for EncodeUtf16Producer<'ch> { + type Item = u16; + + fn split(self) -> (Self, Option) { + match split(self.chars) { + Some((left, right)) => ( + EncodeUtf16Producer { chars: left }, + Some(EncodeUtf16Producer { chars: right }), + ), + None => (self, None), + } + } + + fn fold_with(self, folder: F) -> F + where + F: Folder, + { + folder.consume_iter(self.chars.encode_utf16()) + } +} + +// ///////////////////////////////////////////////////////////////////////// + +/// Parallel iterator over substrings separated by a pattern +#[derive(Debug, Clone)] +pub struct Split<'ch, P: Pattern> { + chars: &'ch str, + separator: P, +} + +impl<'ch, P: Pattern> Split<'ch, P> { + fn new(chars: &'ch str, separator: P) -> Self { + Split { chars, separator } + } +} + +impl<'ch, P: Pattern> ParallelIterator for Split<'ch, P> { + type Item = &'ch str; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + let producer = SplitProducer::new(self.chars, &self.separator); + bridge_unindexed(producer, consumer) + } +} + +/// Implement support for `SplitProducer`. +impl<'ch, P: Pattern> Fissile

for &'ch str { + fn length(&self) -> usize { + self.len() + } + + fn midpoint(&self, end: usize) -> usize { + // First find a suitable UTF-8 boundary. + find_char_midpoint(&self[..end]) + } + + fn find(&self, separator: &P, start: usize, end: usize) -> Option { + separator.find_in(&self[start..end]) + } + + fn rfind(&self, separator: &P, end: usize) -> Option { + separator.rfind_in(&self[..end]) + } + + fn split_once(self, index: usize) -> (Self, Self) { + let (left, right) = self.split_at(index); + let mut right_iter = right.chars(); + right_iter.next(); // skip the separator + (left, right_iter.as_str()) + } + + fn fold_splits(self, separator: &P, folder: F, skip_last: bool) -> F + where + F: Folder, + { + separator.fold_splits(self, folder, skip_last) + } +} + +// ///////////////////////////////////////////////////////////////////////// + +/// Parallel iterator over substrings separated by a terminator pattern +#[derive(Debug, Clone)] +pub struct SplitTerminator<'ch, P: Pattern> { + chars: &'ch str, + terminator: P, +} + +struct SplitTerminatorProducer<'ch, 'sep, P: Pattern> { + splitter: SplitProducer<'sep, P, &'ch str>, + skip_last: bool, +} + +impl<'ch, P: Pattern> SplitTerminator<'ch, P> { + fn new(chars: &'ch str, terminator: P) -> Self { + SplitTerminator { chars, terminator } + } +} + +impl<'ch, 'sep, P: Pattern + 'sep> SplitTerminatorProducer<'ch, 'sep, P> { + fn new(chars: &'ch str, terminator: &'sep P) -> Self { + SplitTerminatorProducer { + splitter: SplitProducer::new(chars, terminator), + skip_last: chars.is_empty() || terminator.is_suffix_of(chars), + } + } +} + +impl<'ch, P: Pattern> ParallelIterator for SplitTerminator<'ch, P> { + type Item = &'ch str; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + let producer = SplitTerminatorProducer::new(self.chars, &self.terminator); + bridge_unindexed(producer, consumer) + } +} + +impl<'ch, 'sep, P: Pattern + 'sep> UnindexedProducer for SplitTerminatorProducer<'ch, 'sep, P> { + type Item = &'ch str; + + fn split(mut self) -> (Self, Option) { + let (left, right) = self.splitter.split(); + self.splitter = left; + let right = right.map(|right| { + let skip_last = self.skip_last; + self.skip_last = false; + SplitTerminatorProducer { + splitter: right, + skip_last, + } + }); + (self, right) + } + + fn fold_with(self, folder: F) -> F + where + F: Folder, + { + self.splitter.fold_with(folder, self.skip_last) + } +} + +// ///////////////////////////////////////////////////////////////////////// + +/// Parallel iterator over lines in a string +#[derive(Debug, Clone)] +pub struct Lines<'ch>(&'ch str); + +#[inline] +fn no_carriage_return(line: &str) -> &str { + if line.ends_with('\r') { + &line[..line.len() - 1] + } else { + line + } +} + +impl<'ch> ParallelIterator for Lines<'ch> { + type Item = &'ch str; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + self.0 + .par_split_terminator('\n') + .map(no_carriage_return) + .drive_unindexed(consumer) + } +} + +// ///////////////////////////////////////////////////////////////////////// + +/// Parallel iterator over substrings separated by whitespace +#[derive(Debug, Clone)] +pub struct SplitWhitespace<'ch>(&'ch str); + +#[inline] +fn not_empty(s: &&str) -> bool { + !s.is_empty() +} + +impl<'ch> ParallelIterator for SplitWhitespace<'ch> { + type Item = &'ch str; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + self.0 + .par_split(char::is_whitespace) + .filter(not_empty) + .drive_unindexed(consumer) + } +} + +// ///////////////////////////////////////////////////////////////////////// + +/// Parallel iterator over substrings that match a pattern +#[derive(Debug, Clone)] +pub struct Matches<'ch, P: Pattern> { + chars: &'ch str, + pattern: P, +} + +struct MatchesProducer<'ch, 'pat, P: Pattern> { + chars: &'ch str, + pattern: &'pat P, +} + +impl<'ch, P: Pattern> ParallelIterator for Matches<'ch, P> { + type Item = &'ch str; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + let producer = MatchesProducer { + chars: self.chars, + pattern: &self.pattern, + }; + bridge_unindexed(producer, consumer) + } +} + +impl<'ch, 'pat, P: Pattern> UnindexedProducer for MatchesProducer<'ch, 'pat, P> { + type Item = &'ch str; + + fn split(self) -> (Self, Option) { + match split(self.chars) { + Some((left, right)) => ( + MatchesProducer { + chars: left, + ..self + }, + Some(MatchesProducer { + chars: right, + ..self + }), + ), + None => (self, None), + } + } + + fn fold_with(self, folder: F) -> F + where + F: Folder, + { + self.pattern.fold_matches(self.chars, folder) + } +} + +// ///////////////////////////////////////////////////////////////////////// + +/// Parallel iterator over substrings that match a pattern, with their positions +#[derive(Debug, Clone)] +pub struct MatchIndices<'ch, P: Pattern> { + chars: &'ch str, + pattern: P, +} + +struct MatchIndicesProducer<'ch, 'pat, P: Pattern> { + index: usize, + chars: &'ch str, + pattern: &'pat P, +} + +impl<'ch, P: Pattern> ParallelIterator for MatchIndices<'ch, P> { + type Item = (usize, &'ch str); + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + let producer = MatchIndicesProducer { + index: 0, + chars: self.chars, + pattern: &self.pattern, + }; + bridge_unindexed(producer, consumer) + } +} + +impl<'ch, 'pat, P: Pattern> UnindexedProducer for MatchIndicesProducer<'ch, 'pat, P> { + type Item = (usize, &'ch str); + + fn split(self) -> (Self, Option) { + match split(self.chars) { + Some((left, right)) => ( + MatchIndicesProducer { + chars: left, + ..self + }, + Some(MatchIndicesProducer { + chars: right, + index: self.index + left.len(), + ..self + }), + ), + None => (self, None), + } + } + + fn fold_with(self, folder: F) -> F + where + F: Folder, + { + self.pattern + .fold_match_indices(self.chars, folder, self.index) + } +} diff --git a/src/string.rs b/src/string.rs new file mode 100644 index 0000000..91e69f9 --- /dev/null +++ b/src/string.rs @@ -0,0 +1,48 @@ +//! This module contains the parallel iterator types for owned strings +//! (`String`). You will rarely need to interact with it directly +//! unless you have need to name one of the iterator types. + +use crate::iter::plumbing::*; +use crate::math::simplify_range; +use crate::prelude::*; +use std::ops::{Range, RangeBounds}; + +impl<'a> ParallelDrainRange for &'a mut String { + type Iter = Drain<'a>; + type Item = char; + + fn par_drain>(self, range: R) -> Self::Iter { + Drain { + range: simplify_range(range, self.len()), + string: self, + } + } +} + +/// Draining parallel iterator that moves a range of characters out of a string, +/// but keeps the total capacity. +#[derive(Debug)] +pub struct Drain<'a> { + string: &'a mut String, + range: Range, +} + +impl<'a> ParallelIterator for Drain<'a> { + type Item = char; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + self.string[self.range.clone()] + .par_chars() + .drive_unindexed(consumer) + } +} + +impl<'a> Drop for Drain<'a> { + fn drop(&mut self) { + // Remove the drained range. + self.string.drain(self.range.clone()); + } +} diff --git a/src/vec.rs b/src/vec.rs new file mode 100644 index 0000000..686673b --- /dev/null +++ b/src/vec.rs @@ -0,0 +1,245 @@ +//! Parallel iterator types for [vectors][std::vec] (`Vec`) +//! +//! You will rarely need to interact with this module directly unless you need +//! to name one of the iterator types. +//! +//! [std::vec]: https://doc.rust-lang.org/stable/std/vec/ + +use crate::iter::plumbing::*; +use crate::iter::*; +use crate::math::simplify_range; +use std::iter; +use std::mem; +use std::ops::{Range, RangeBounds}; +use std::ptr; +use std::slice; + +/// Parallel iterator that moves out of a vector. +#[derive(Debug, Clone)] +pub struct IntoIter { + vec: Vec, +} + +impl IntoParallelIterator for Vec { + type Item = T; + type Iter = IntoIter; + + fn into_par_iter(self) -> Self::Iter { + IntoIter { vec: self } + } +} + +impl ParallelIterator for IntoIter { + type Item = T; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + bridge(self, consumer) + } + + fn opt_len(&self) -> Option { + Some(self.len()) + } +} + +impl IndexedParallelIterator for IntoIter { + fn drive(self, consumer: C) -> C::Result + where + C: Consumer, + { + bridge(self, consumer) + } + + fn len(&self) -> usize { + self.vec.len() + } + + fn with_producer(mut self, callback: CB) -> CB::Output + where + CB: ProducerCallback, + { + // Drain every item, and then the vector only needs to free its buffer. + self.vec.par_drain(..).with_producer(callback) + } +} + +impl<'data, T: Send> ParallelDrainRange for &'data mut Vec { + type Iter = Drain<'data, T>; + type Item = T; + + fn par_drain>(self, range: R) -> Self::Iter { + Drain { + orig_len: self.len(), + range: simplify_range(range, self.len()), + vec: self, + } + } +} + +/// Draining parallel iterator that moves a range out of a vector, but keeps the total capacity. +#[derive(Debug)] +pub struct Drain<'data, T: Send> { + vec: &'data mut Vec, + range: Range, + orig_len: usize, +} + +impl<'data, T: Send> ParallelIterator for Drain<'data, T> { + type Item = T; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + bridge(self, consumer) + } + + fn opt_len(&self) -> Option { + Some(self.len()) + } +} + +impl<'data, T: Send> IndexedParallelIterator for Drain<'data, T> { + fn drive(self, consumer: C) -> C::Result + where + C: Consumer, + { + bridge(self, consumer) + } + + fn len(&self) -> usize { + self.range.len() + } + + fn with_producer(self, callback: CB) -> CB::Output + where + CB: ProducerCallback, + { + unsafe { + // Make the vector forget about the drained items, and temporarily the tail too. + let start = self.range.start; + self.vec.set_len(start); + + // Get a correct borrow lifetime, then extend it to the original length. + let mut slice = &mut self.vec[start..]; + slice = slice::from_raw_parts_mut(slice.as_mut_ptr(), self.range.len()); + + // The producer will move or drop each item from the drained range. + callback.callback(DrainProducer::new(slice)) + } + } +} + +impl<'data, T: Send> Drop for Drain<'data, T> { + fn drop(&mut self) { + if self.range.len() > 0 { + let Range { start, end } = self.range; + if self.vec.len() != start { + // We must not have produced, so just call a normal drain to remove the items. + assert_eq!(self.vec.len(), self.orig_len); + self.vec.drain(start..end); + } else if end < self.orig_len { + // The producer was responsible for consuming the drained items. + // Move the tail items to their new place, then set the length to include them. + unsafe { + let ptr = self.vec.as_mut_ptr().add(start); + let tail_ptr = self.vec.as_ptr().add(end); + let tail_len = self.orig_len - end; + ptr::copy(tail_ptr, ptr, tail_len); + self.vec.set_len(start + tail_len); + } + } + } + } +} + +/// //////////////////////////////////////////////////////////////////////// + +pub(crate) struct DrainProducer<'data, T: Send> { + slice: &'data mut [T], +} + +impl<'data, T: 'data + Send> DrainProducer<'data, T> { + /// Creates a draining producer, which *moves* items from the slice. + /// + /// Unsafe bacause `!Copy` data must not be read after the borrow is released. + pub(crate) unsafe fn new(slice: &'data mut [T]) -> Self { + DrainProducer { slice } + } +} + +impl<'data, T: 'data + Send> Producer for DrainProducer<'data, T> { + type Item = T; + type IntoIter = SliceDrain<'data, T>; + + fn into_iter(mut self) -> Self::IntoIter { + // replace the slice so we don't drop it twice + let slice = mem::replace(&mut self.slice, &mut []); + SliceDrain { + iter: slice.iter_mut(), + } + } + + fn split_at(mut self, index: usize) -> (Self, Self) { + // replace the slice so we don't drop it twice + let slice = mem::replace(&mut self.slice, &mut []); + let (left, right) = slice.split_at_mut(index); + unsafe { (DrainProducer::new(left), DrainProducer::new(right)) } + } +} + +impl<'data, T: 'data + Send> Drop for DrainProducer<'data, T> { + fn drop(&mut self) { + // use `Drop for [T]` + unsafe { ptr::drop_in_place(self.slice) }; + } +} + +/// //////////////////////////////////////////////////////////////////////// + +// like std::vec::Drain, without updating a source Vec +pub(crate) struct SliceDrain<'data, T> { + iter: slice::IterMut<'data, T>, +} + +impl<'data, T: 'data> Iterator for SliceDrain<'data, T> { + type Item = T; + + fn next(&mut self) -> Option { + let ptr = self.iter.next()?; + Some(unsafe { ptr::read(ptr) }) + } + + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } + + fn count(self) -> usize { + self.iter.len() + } +} + +impl<'data, T: 'data> DoubleEndedIterator for SliceDrain<'data, T> { + fn next_back(&mut self) -> Option { + let ptr = self.iter.next_back()?; + Some(unsafe { ptr::read(ptr) }) + } +} + +impl<'data, T: 'data> ExactSizeIterator for SliceDrain<'data, T> { + fn len(&self) -> usize { + self.iter.len() + } +} + +impl<'data, T: 'data> iter::FusedIterator for SliceDrain<'data, T> {} + +impl<'data, T: 'data> Drop for SliceDrain<'data, T> { + fn drop(&mut self) { + // extract the iterator so we can use `Drop for [T]` + let iter = mem::replace(&mut self.iter, [].iter_mut()); + unsafe { ptr::drop_in_place(iter.into_slice()) }; + } +} -- cgit v1.2.3

+where + P: Producer, + T: 'a + Clone, +{ + type Item = T; + type IntoIter = iter::Cloned; + + fn into_iter(self) -> Self::IntoIter { + self.base.into_iter().cloned() + } + + fn min_len(&self) -> usize { + self.base.min_len() + } + + fn max_len(&self) -> usize { + self.base.max_len() + } + + fn split_at(self, index: usize) -> (Self, Self) { + let (left, right) = self.base.split_at(index); + ( + ClonedProducer { base: left }, + ClonedProducer { base: right }, + ) + } + + fn fold_with(self, folder: F) -> F + where + F: Folder, + { + self.base.fold_with(ClonedFolder { base: folder }).base + } +} + +/// //////////////////////////////////////////////////////////////////////// +/// Consumer implementation + +struct ClonedConsumer { + base: C, +} + +impl ClonedConsumer { + fn new(base: C) -> Self { + ClonedConsumer { base } + } +} + +impl<'a, T, C> Consumer<&'a T> for ClonedConsumer +where + C: Consumer, + T: 'a + Clone, +{ + type Folder = ClonedFolder; + type Reducer = C::Reducer; + type Result = C::Result; + + fn split_at(self, index: usize) -> (Self, Self, Self::Reducer) { + let (left, right, reducer) = self.base.split_at(index); + ( + ClonedConsumer::new(left), + ClonedConsumer::new(right), + reducer, + ) + } + + fn into_folder(self) -> Self::Folder { + ClonedFolder { + base: self.base.into_folder(), + } + } + + fn full(&self) -> bool { + self.base.full() + } +} + +impl<'a, T, C> UnindexedConsumer<&'a T> for ClonedConsumer +where + C: UnindexedConsumer, + T: 'a + Clone, +{ + fn split_off_left(&self) -> Self { + ClonedConsumer::new(self.base.split_off_left()) + } + + fn to_reducer(&self) -> Self::Reducer { + self.base.to_reducer() + } +} + +struct ClonedFolder { + base: F, +} + +impl<'a, T, F> Folder<&'a T> for ClonedFolder +where + F: Folder, + T: 'a + Clone, +{ + type Result = F::Result; + + fn consume(self, item: &'a T) -> Self { + ClonedFolder { + base: self.base.consume(item.clone()), + } + } + + fn consume_iter(mut self, iter: I) -> Self + where + I: IntoIterator, + { + self.base = self.base.consume_iter(iter.into_iter().cloned()); + self + } + + fn complete(self) -> F::Result { + self.base.complete() + } + + fn full(&self) -> bool { + self.base.full() + } +} diff --git a/src/iter/collect/consumer.rs b/src/iter/collect/consumer.rs new file mode 100644 index 0000000..689f29c --- /dev/null +++ b/src/iter/collect/consumer.rs @@ -0,0 +1,159 @@ +use super::super::plumbing::*; +use std::marker::PhantomData; +use std::ptr; +use std::slice; + +pub(super) struct CollectConsumer<'c, T: Send> { + /// A slice covering the target memory, not yet initialized! + target: &'c mut [T], +} + +pub(super) struct CollectFolder<'c, T: Send> { + /// The folder writes into `result` and must extend the result + /// up to exactly this number of elements. + final_len: usize, + + /// The current written-to part of our slice of the target + result: CollectResult<'c, T>, +} + +impl<'c, T: Send + 'c> CollectConsumer<'c, T> { + /// The target memory is considered uninitialized, and will be + /// overwritten without reading or dropping existing values. + pub(super) fn new(target: &'c mut [T]) -> Self { + CollectConsumer { target } + } +} + +/// CollectResult represents an initialized part of the target slice. +/// +/// This is a proxy owner of the elements in the slice; when it drops, +/// the elements will be dropped, unless its ownership is released before then. +#[must_use] +pub(super) struct CollectResult<'c, T> { + start: *mut T, + len: usize, + invariant_lifetime: PhantomData<&'c mut &'c mut [T]>, +} + +unsafe impl<'c, T> Send for CollectResult<'c, T> where T: Send {} + +impl<'c, T> CollectResult<'c, T> { + /// The current length of the collect result + pub(super) fn len(&self) -> usize { + self.len + } + + /// Release ownership of the slice of elements, and return the length + pub(super) fn release_ownership(mut self) -> usize { + let ret = self.len; + self.len = 0; + ret + } +} + +impl<'c, T> Drop for CollectResult<'c, T> { + fn drop(&mut self) { + // Drop the first `self.len` elements, which have been recorded + // to be initialized by the folder. + unsafe { + ptr::drop_in_place(slice::from_raw_parts_mut(self.start, self.len)); + } + } +} + +impl<'c, T: Send + 'c> Consumer for CollectConsumer<'c, T> { + type Folder = CollectFolder<'c, T>; + type Reducer = CollectReducer; + type Result = CollectResult<'c, T>; + + fn split_at(self, index: usize) -> (Self, Self, CollectReducer) { + let CollectConsumer { target } = self; + + // Produce new consumers. Normal slicing ensures that the + // memory range given to each consumer is disjoint. + let (left, right) = target.split_at_mut(index); + ( + CollectConsumer::new(left), + CollectConsumer::new(right), + CollectReducer, + ) + } + + fn into_folder(self) -> CollectFolder<'c, T> { + // Create a folder that consumes values and writes them + // into target. The initial result has length 0. + CollectFolder { + final_len: self.target.len(), + result: CollectResult { + start: self.target.as_mut_ptr(), + len: 0, + invariant_lifetime: PhantomData, + }, + } + } + + fn full(&self) -> bool { + false + } +} + +impl<'c, T: Send + 'c> Folder for CollectFolder<'c, T> { + type Result = CollectResult<'c, T>; + + fn consume(mut self, item: T) -> CollectFolder<'c, T> { + if self.result.len >= self.final_len { + panic!("too many values pushed to consumer"); + } + + // Compute target pointer and write to it, and + // extend the current result by one element + unsafe { + self.result.start.add(self.result.len).write(item); + self.result.len += 1; + } + + self + } + + fn complete(self) -> Self::Result { + // NB: We don't explicitly check that the local writes were complete, + // but Collect will assert the total result length in the end. + self.result + } + + fn full(&self) -> bool { + false + } +} + +/// Pretend to be unindexed for `special_collect_into_vec`, +/// but we should never actually get used that way... +impl<'c, T: Send + 'c> UnindexedConsumer for CollectConsumer<'c, T> { + fn split_off_left(&self) -> Self { + unreachable!("CollectConsumer must be indexed!") + } + fn to_reducer(&self) -> Self::Reducer { + CollectReducer + } +} + +/// CollectReducer combines adjacent chunks; the result must always +/// be contiguous so that it is one combined slice. +pub(super) struct CollectReducer; + +impl<'c, T> Reducer> for CollectReducer { + fn reduce( + self, + mut left: CollectResult<'c, T>, + right: CollectResult<'c, T>, + ) -> CollectResult<'c, T> { + // Merge if the CollectResults are adjacent and in left to right order + // else: drop the right piece now and total length will end up short in the end, + // when the correctness of the collected result is asserted. + if left.start.wrapping_add(left.len) == right.start { + left.len += right.release_ownership(); + } + left + } +} diff --git a/src/iter/collect/mod.rs b/src/iter/collect/mod.rs new file mode 100644 index 0000000..e18298e --- /dev/null +++ b/src/iter/collect/mod.rs @@ -0,0 +1,171 @@ +use super::{IndexedParallelIterator, IntoParallelIterator, ParallelExtend, ParallelIterator}; +use std::slice; + +mod consumer; +use self::consumer::CollectConsumer; +use self::consumer::CollectResult; +use super::unzip::unzip_indexed; + +mod test; + +/// Collects the results of the exact iterator into the specified vector. +/// +/// This is called by `IndexedParallelIterator::collect_into_vec`. +pub(super) fn collect_into_vec(pi: I, v: &mut Vec) +where + I: IndexedParallelIterator, + T: Send, +{ + v.truncate(0); // clear any old data + let len = pi.len(); + Collect::new(v, len).with_consumer(|consumer| pi.drive(consumer)); +} + +/// Collects the results of the iterator into the specified vector. +/// +/// Technically, this only works for `IndexedParallelIterator`, but we're faking a +/// bit of specialization here until Rust can do that natively. Callers are +/// using `opt_len` to find the length before calling this, and only exact +/// iterators will return anything but `None` there. +/// +/// Since the type system doesn't understand that contract, we have to allow +/// *any* `ParallelIterator` here, and `CollectConsumer` has to also implement +/// `UnindexedConsumer`. That implementation panics `unreachable!` in case +/// there's a bug where we actually do try to use this unindexed. +fn special_extend(pi: I, len: usize, v: &mut Vec) +where + I: ParallelIterator, + T: Send, +{ + Collect::new(v, len).with_consumer(|consumer| pi.drive_unindexed(consumer)); +} + +/// Unzips the results of the exact iterator into the specified vectors. +/// +/// This is called by `IndexedParallelIterator::unzip_into_vecs`. +pub(super) fn unzip_into_vecs(pi: I, left: &mut Vec, right: &mut Vec) +where + I: IndexedParallelIterator, + A: Send, + B: Send, +{ + // clear any old data + left.truncate(0); + right.truncate(0); + + let len = pi.len(); + Collect::new(right, len).with_consumer(|right_consumer| { + let mut right_result = None; + Collect::new(left, len).with_consumer(|left_consumer| { + let (left_r, right_r) = unzip_indexed(pi, left_consumer, right_consumer); + right_result = Some(right_r); + left_r + }); + right_result.unwrap() + }); +} + +/// Manage the collection vector. +struct Collect<'c, T: Send> { + vec: &'c mut Vec, + len: usize, +} + +impl<'c, T: Send + 'c> Collect<'c, T> { + fn new(vec: &'c mut Vec, len: usize) -> Self { + Collect { vec, len } + } + + /// Create a consumer on the slice of memory we are collecting into. + /// + /// The consumer needs to be used inside the scope function, and the + /// complete collect result passed back. + /// + /// This method will verify the collect result, and panic if the slice + /// was not fully written into. Otherwise, in the successful case, + /// the vector is complete with the collected result. + fn with_consumer(mut self, scope_fn: F) + where + F: FnOnce(CollectConsumer<'_, T>) -> CollectResult<'_, T>, + { + unsafe { + let slice = Self::reserve_get_tail_slice(&mut self.vec, self.len); + let result = scope_fn(CollectConsumer::new(slice)); + + // The CollectResult represents a contiguous part of the + // slice, that has been written to. + // On unwind here, the CollectResult will be dropped. + // If some producers on the way did not produce enough elements, + // partial CollectResults may have been dropped without + // being reduced to the final result, and we will see + // that as the length coming up short. + // + // Here, we assert that `slice` is fully initialized. This is + // checked by the following assert, which verifies if a + // complete CollectResult was produced; if the length is + // correct, it is necessarily covering the target slice. + // Since we know that the consumer cannot have escaped from + // `drive` (by parametricity, essentially), we know that any + // stores that will happen, have happened. Unless some code is buggy, + // that means we should have seen `len` total writes. + let actual_writes = result.len(); + assert!( + actual_writes == self.len, + "expected {} total writes, but got {}", + self.len, + actual_writes + ); + + // Release the result's mutable borrow and "proxy ownership" + // of the elements, before the vector takes it over. + result.release_ownership(); + + let new_len = self.vec.len() + self.len; + self.vec.set_len(new_len); + } + } + + /// Reserve space for `len` more elements in the vector, + /// and return a slice to the uninitialized tail of the vector + /// + /// Safety: The tail slice is uninitialized + unsafe fn reserve_get_tail_slice(vec: &mut Vec, len: usize) -> &mut [T] { + // Reserve the new space. + vec.reserve(len); + + // Get a correct borrow, then extend it for the newly added length. + let start = vec.len(); + let slice = &mut vec[start..]; + slice::from_raw_parts_mut(slice.as_mut_ptr(), len) + } +} + +/// Extends a vector with items from a parallel iterator. +impl ParallelExtend for Vec +where + T: Send, +{ + fn par_extend(&mut self, par_iter: I) + where + I: IntoParallelIterator, + { + // See the vec_collect benchmarks in rayon-demo for different strategies. + let par_iter = par_iter.into_par_iter(); + match par_iter.opt_len() { + Some(len) => { + // When Rust gets specialization, we can get here for indexed iterators + // without relying on `opt_len`. Until then, `special_extend()` fakes + // an unindexed mode on the promise that `opt_len()` is accurate. + special_extend(par_iter, len, self); + } + None => { + // This works like `extend`, but `Vec::append` is more efficient. + let list = super::extend::collect(par_iter); + self.reserve(super::extend::len(&list)); + for mut vec in list { + self.append(&mut vec); + } + } + } + } +} diff --git a/src/iter/collect/test.rs b/src/iter/collect/test.rs new file mode 100644 index 0000000..00c16c4 --- /dev/null +++ b/src/iter/collect/test.rs @@ -0,0 +1,385 @@ +#![cfg(test)] +#![allow(unused_assignments)] + +// These tests are primarily targeting "abusive" producers that will +// try to drive the "collect consumer" incorrectly. These should +// result in panics. + +use super::Collect; +use crate::iter::plumbing::*; +use rayon_core::join; + +use std::fmt; +use std::panic; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::thread::Result as ThreadResult; + +/// Promises to produce 2 items, but then produces 3. Does not do any +/// splits at all. +#[test] +#[should_panic(expected = "too many values")] +fn produce_too_many_items() { + let mut v = vec![]; + Collect::new(&mut v, 2).with_consumer(|consumer| { + let mut folder = consumer.into_folder(); + folder = folder.consume(22); + folder = folder.consume(23); + folder.consume(24); + unreachable!("folder does not complete") + }); +} + +/// Produces fewer items than promised. Does not do any +/// splits at all. +#[test] +#[should_panic(expected = "expected 5 total writes, but got 2")] +fn produce_fewer_items() { + let mut v = vec![]; + let collect = Collect::new(&mut v, 5); + collect.with_consumer(|consumer| { + let mut folder = consumer.into_folder(); + folder = folder.consume(22); + folder = folder.consume(23); + folder.complete() + }); +} + +// Complete is not called by the consumer. Hence,the collection vector is not fully initialized. +#[test] +#[should_panic(expected = "expected 4 total writes, but got 2")] +fn left_produces_items_with_no_complete() { + let mut v = vec![]; + let collect = Collect::new(&mut v, 4); + collect.with_consumer(|consumer| { + let (left_consumer, right_consumer, _) = consumer.split_at(2); + let mut left_folder = left_consumer.into_folder(); + let mut right_folder = right_consumer.into_folder(); + left_folder = left_folder.consume(0).consume(1); + right_folder = right_folder.consume(2).consume(3); + right_folder.complete() + }); +} + +// Complete is not called by the right consumer. Hence,the +// collection vector is not fully initialized. +#[test] +#[should_panic(expected = "expected 4 total writes, but got 2")] +fn right_produces_items_with_no_complete() { + let mut v = vec![]; + let collect = Collect::new(&mut v, 4); + collect.with_consumer(|consumer| { + let (left_consumer, right_consumer, _) = consumer.split_at(2); + let mut left_folder = left_consumer.into_folder(); + let mut right_folder = right_consumer.into_folder(); + left_folder = left_folder.consume(0).consume(1); + right_folder = right_folder.consume(2).consume(3); + left_folder.complete() + }); +} + +// Complete is not called by the consumer. Hence,the collection vector is not fully initialized. +#[test] +fn produces_items_with_no_complete() { + let counter = DropCounter::default(); + let mut v = vec![]; + let panic_result = panic::catch_unwind(panic::AssertUnwindSafe(|| { + let collect = Collect::new(&mut v, 2); + collect.with_consumer(|consumer| { + let mut folder = consumer.into_folder(); + folder = folder.consume(counter.element()); + folder = folder.consume(counter.element()); + panic!("folder does not complete"); + }); + })); + assert!(v.is_empty()); + assert_is_panic_with_message(&panic_result, "folder does not complete"); + counter.assert_drop_count(); +} + +// The left consumer produces too many items while the right +// consumer produces correct number. +#[test] +#[should_panic(expected = "too many values")] +fn left_produces_too_many_items() { + let mut v = vec![]; + let collect = Collect::new(&mut v, 4); + collect.with_consumer(|consumer| { + let (left_consumer, right_consumer, _) = consumer.split_at(2); + let mut left_folder = left_consumer.into_folder(); + let mut right_folder = right_consumer.into_folder(); + left_folder = left_folder.consume(0).consume(1).consume(2); + right_folder = right_folder.consume(2).consume(3); + let _ = right_folder.complete(); + unreachable!("folder does not complete"); + }); +} + +// The right consumer produces too many items while the left +// consumer produces correct number. +#[test] +#[should_panic(expected = "too many values")] +fn right_produces_too_many_items() { + let mut v = vec![]; + let collect = Collect::new(&mut v, 4); + collect.with_consumer(|consumer| { + let (left_consumer, right_consumer, _) = consumer.split_at(2); + let mut left_folder = left_consumer.into_folder(); + let mut right_folder = right_consumer.into_folder(); + left_folder = left_folder.consume(0).consume(1); + right_folder = right_folder.consume(2).consume(3).consume(4); + let _ = left_folder.complete(); + unreachable!("folder does not complete"); + }); +} + +// The left consumer produces fewer items while the right +// consumer produces correct number. +#[test] +#[should_panic(expected = "expected 4 total writes, but got 1")] +fn left_produces_fewer_items() { + let mut v = vec![]; + let collect = Collect::new(&mut v, 4); + collect.with_consumer(|consumer| { + let reducer = consumer.to_reducer(); + let (left_consumer, right_consumer, _) = consumer.split_at(2); + let mut left_folder = left_consumer.into_folder(); + let mut right_folder = right_consumer.into_folder(); + left_folder = left_folder.consume(0); + right_folder = right_folder.consume(2).consume(3); + let left_result = left_folder.complete(); + let right_result = right_folder.complete(); + reducer.reduce(left_result, right_result) + }); +} + +// The left and right consumer produce the correct number but +// only left result is returned +#[test] +#[should_panic(expected = "expected 4 total writes, but got 2")] +fn only_left_result() { + let mut v = vec![]; + let collect = Collect::new(&mut v, 4); + collect.with_consumer(|consumer| { + let (left_consumer, right_consumer, _) = consumer.split_at(2); + let mut left_folder = left_consumer.into_folder(); + let mut right_folder = right_consumer.into_folder(); + left_folder = left_folder.consume(0).consume(1); + right_folder = right_folder.consume(2).consume(3); + let left_result = left_folder.complete(); + let _ = right_folder.complete(); + left_result + }); +} + +// The left and right consumer produce the correct number but +// only right result is returned +#[test] +#[should_panic(expected = "expected 4 total writes, but got 2")] +fn only_right_result() { + let mut v = vec![]; + let collect = Collect::new(&mut v, 4); + collect.with_consumer(|consumer| { + let (left_consumer, right_consumer, _) = consumer.split_at(2); + let mut left_folder = left_consumer.into_folder(); + let mut right_folder = right_consumer.into_folder(); + left_folder = left_folder.consume(0).consume(1); + right_folder = right_folder.consume(2).consume(3); + let _ = left_folder.complete(); + right_folder.complete() + }); +} + +// The left and right consumer produce the correct number but reduce +// in the wrong order. +#[test] +#[should_panic(expected = "expected 4 total writes, but got 2")] +fn reducer_does_not_preserve_order() { + let mut v = vec![]; + let collect = Collect::new(&mut v, 4); + collect.with_consumer(|consumer| { + let reducer = consumer.to_reducer(); + let (left_consumer, right_consumer, _) = consumer.split_at(2); + let mut left_folder = left_consumer.into_folder(); + let mut right_folder = right_consumer.into_folder(); + left_folder = left_folder.consume(0).consume(1); + right_folder = right_folder.consume(2).consume(3); + let left_result = left_folder.complete(); + let right_result = right_folder.complete(); + reducer.reduce(right_result, left_result) + }); +} + +// The right consumer produces fewer items while the left +// consumer produces correct number. +#[test] +#[should_panic(expected = "expected 4 total writes, but got 3")] +fn right_produces_fewer_items() { + let mut v = vec![]; + let collect = Collect::new(&mut v, 4); + collect.with_consumer(|consumer| { + let reducer = consumer.to_reducer(); + let (left_consumer, right_consumer, _) = consumer.split_at(2); + let mut left_folder = left_consumer.into_folder(); + let mut right_folder = right_consumer.into_folder(); + left_folder = left_folder.consume(0).consume(1); + right_folder = right_folder.consume(2); + let left_result = left_folder.complete(); + let right_result = right_folder.complete(); + reducer.reduce(left_result, right_result) + }); +} + +// The left consumer panics and the right stops short, like `panic_fuse()`. +// We should get the left panic without finishing `Collect::with_consumer`. +#[test] +#[should_panic(expected = "left consumer panic")] +fn left_panics() { + let mut v = vec![]; + let collect = Collect::new(&mut v, 4); + collect.with_consumer(|consumer| { + let reducer = consumer.to_reducer(); + let (left_consumer, right_consumer, _) = consumer.split_at(2); + let (left_result, right_result) = join( + || { + let mut left_folder = left_consumer.into_folder(); + left_folder = left_folder.consume(0); + panic!("left consumer panic"); + }, + || { + let mut right_folder = right_consumer.into_folder(); + right_folder = right_folder.consume(2); + right_folder.complete() // early return + }, + ); + reducer.reduce(left_result, right_result) + }); + unreachable!(); +} + +// The right consumer panics and the left stops short, like `panic_fuse()`. +// We should get the right panic without finishing `Collect::with_consumer`. +#[test] +#[should_panic(expected = "right consumer panic")] +fn right_panics() { + let mut v = vec![]; + let collect = Collect::new(&mut v, 4); + collect.with_consumer(|consumer| { + let reducer = consumer.to_reducer(); + let (left_consumer, right_consumer, _) = consumer.split_at(2); + let (left_result, right_result) = join( + || { + let mut left_folder = left_consumer.into_folder(); + left_folder = left_folder.consume(0); + left_folder.complete() // early return + }, + || { + let mut right_folder = right_consumer.into_folder(); + right_folder = right_folder.consume(2); + panic!("right consumer panic"); + }, + ); + reducer.reduce(left_result, right_result) + }); + unreachable!(); +} + +// The left consumer produces fewer items while the right +// consumer produces correct number; check that created elements are dropped +#[test] +fn left_produces_fewer_items_drops() { + let counter = DropCounter::default(); + let mut v = vec![]; + let panic_result = panic::catch_unwind(panic::AssertUnwindSafe(|| { + let collect = Collect::new(&mut v, 4); + collect.with_consumer(|consumer| { + let reducer = consumer.to_reducer(); + let (left_consumer, right_consumer, _) = consumer.split_at(2); + let mut left_folder = left_consumer.into_folder(); + let mut right_folder = right_consumer.into_folder(); + left_folder = left_folder.consume(counter.element()); + right_folder = right_folder + .consume(counter.element()) + .consume(counter.element()); + let left_result = left_folder.complete(); + let right_result = right_folder.complete(); + reducer.reduce(left_result, right_result) + }); + })); + assert!(v.is_empty()); + assert_is_panic_with_message(&panic_result, "expected 4 total writes, but got 1"); + counter.assert_drop_count(); +} + +/// This counter can create elements, and then count and verify +/// the number of which have actually been dropped again. +#[derive(Default)] +struct DropCounter { + created: AtomicUsize, + dropped: AtomicUsize, +} + +struct Element<'a>(&'a AtomicUsize); + +impl DropCounter { + fn created(&self) -> usize { + self.created.load(Ordering::SeqCst) + } + + fn dropped(&self) -> usize { + self.dropped.load(Ordering::SeqCst) + } + + fn element(&self) -> Element<'_> { + self.created.fetch_add(1, Ordering::SeqCst); + Element(&self.dropped) + } + + fn assert_drop_count(&self) { + assert_eq!( + self.created(), + self.dropped(), + "Expected {} dropped elements, but found {}", + self.created(), + self.dropped() + ); + } +} + +impl<'a> Drop for Element<'a> { + fn drop(&mut self) { + self.0.fetch_add(1, Ordering::SeqCst); + } +} + +/// Assert that the result from catch_unwind is a panic that contains expected message +fn assert_is_panic_with_message(result: &ThreadResult, expected: &str) +where + T: fmt::Debug, +{ + match result { + Ok(value) => { + panic!( + "assertion failure: Expected panic, got successful {:?}", + value + ); + } + Err(error) => { + let message_str = error.downcast_ref::<&'static str>().cloned(); + let message_string = error.downcast_ref::().map(String::as_str); + if let Some(message) = message_str.or(message_string) { + if !message.contains(expected) { + panic!( + "assertion failure: Expected {:?}, but found panic with {:?}", + expected, message + ); + } + // assertion passes + } else { + panic!( + "assertion failure: Expected {:?}, but found panic with unknown value", + expected + ); + } + } + } +} diff --git a/src/iter/copied.rs b/src/iter/copied.rs new file mode 100644 index 0000000..12c9c5b --- /dev/null +++ b/src/iter/copied.rs @@ -0,0 +1,223 @@ +use super::plumbing::*; +use super::*; + +use std::iter; + +/// `Copied` is an iterator that copies the elements of an underlying iterator. +/// +/// This struct is created by the [`copied()`] method on [`ParallelIterator`] +/// +/// [`copied()`]: trait.ParallelIterator.html#method.copied +/// [`ParallelIterator`]: trait.ParallelIterator.html +#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] +#[derive(Debug, Clone)] +pub struct Copied { + base: I, +} + +impl Copied +where + I: ParallelIterator, +{ + /// Creates a new `Copied` iterator. + pub(super) fn new(base: I) -> Self { + Copied { base } + } +} + +impl<'a, T, I> ParallelIterator for Copied +where + I: ParallelIterator, + T: 'a + Copy + Send + Sync, +{ + type Item = T; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + let consumer1 = CopiedConsumer::new(consumer); + self.base.drive_unindexed(consumer1) + } + + fn opt_len(&self) -> Option { + self.base.opt_len() + } +} + +impl<'a, T, I> IndexedParallelIterator for Copied +where + I: IndexedParallelIterator, + T: 'a + Copy + Send + Sync, +{ + fn drive(self, consumer: C) -> C::Result + where + C: Consumer, + { + let consumer1 = CopiedConsumer::new(consumer); + self.base.drive(consumer1) + } + + fn len(&self) -> usize { + self.base.len() + } + + fn with_producer(self, callback: CB) -> CB::Output + where + CB: ProducerCallback, + { + return self.base.with_producer(Callback { callback }); + + struct Callback { + callback: CB, + } + + impl<'a, T, CB> ProducerCallback<&'a T> for Callback + where + CB: ProducerCallback, + T: 'a + Copy + Send, + { + type Output = CB::Output; + + fn callback