diff options
Diffstat (limited to 'src/slice/mergesort.rs')
-rw-r--r-- | src/slice/mergesort.rs | 68 |
1 files changed, 30 insertions, 38 deletions
diff --git a/src/slice/mergesort.rs b/src/slice/mergesort.rs index e9a5d43..fec309d 100644 --- a/src/slice/mergesort.rs +++ b/src/slice/mergesort.rs @@ -6,6 +6,7 @@ use crate::iter::*; use crate::slice::ParallelSliceMut; +use crate::SendPtr; use std::mem; use std::mem::size_of; use std::ptr; @@ -24,7 +25,7 @@ unsafe fn decrement_and_get<T>(ptr: &mut *mut T) -> *mut T { /// When dropped, copies from `src` into `dest` a sequence of length `len`. struct CopyOnDrop<T> { - src: *mut T, + src: *const T, dest: *mut T, len: usize, } @@ -63,9 +64,7 @@ where // performance than with the 2nd method. // // All methods were benchmarked, and the 3rd showed best results. So we chose that one. - let mut tmp = NoDrop { - value: Some(ptr::read(&v[0])), - }; + let tmp = mem::ManuallyDrop::new(ptr::read(&v[0])); // Intermediate state of the insertion process is always tracked by `hole`, which // serves two purposes: @@ -78,13 +77,13 @@ where // fill the hole in `v` with `tmp`, thus ensuring that `v` still holds every object it // initially held exactly once. let mut hole = InsertionHole { - src: tmp.value.as_mut().unwrap(), + src: &*tmp, dest: &mut v[1], }; ptr::copy_nonoverlapping(&v[1], &mut v[0], 1); for i in 2..v.len() { - if !is_less(&v[i], tmp.value.as_ref().unwrap()) { + if !is_less(&v[i], &*tmp) { break; } ptr::copy_nonoverlapping(&v[i], &mut v[i - 1], 1); @@ -94,20 +93,9 @@ where } } - // Holds a value, but never drops it. - struct NoDrop<T> { - value: Option<T>, - } - - impl<T> Drop for NoDrop<T> { - fn drop(&mut self) { - mem::forget(self.value.take()); - } - } - // When dropped, copies from `src` into `dest`. struct InsertionHole<T> { - src: *mut T, + src: *const T, dest: *mut T, } @@ -217,8 +205,8 @@ where impl<T> Drop for MergeHole<T> { fn drop(&mut self) { // `T` is not a zero-sized type, so it's okay to divide by its size. - let len = (self.end as usize - self.start as usize) / size_of::<T>(); unsafe { + let len = self.end.offset_from(self.start) as usize; ptr::copy_nonoverlapping(self.start, self.dest, len); } } @@ -284,7 +272,7 @@ fn collapse(runs: &[Run]) -> Option<usize> { /// Otherwise, it sorts the slice into non-descending order. /// /// This merge sort borrows some (but not all) ideas from TimSort, which is described in detail -/// [here](https://svn.python.org/projects/python/trunk/Objects/listsort.txt). +/// [here](https://github.com/python/cpython/blob/main/Objects/listsort.txt). /// /// The algorithm identifies strictly descending and non-descending subsequences, which are called /// natural runs. There is a stack of pending runs yet to be merged. Each newly found run is pushed @@ -294,7 +282,7 @@ fn collapse(runs: &[Run]) -> Option<usize> { /// 1. for every `i` in `1..runs.len()`: `runs[i - 1].len > runs[i].len` /// 2. for every `i` in `2..runs.len()`: `runs[i - 2].len > runs[i - 1].len + runs[i].len` /// -/// The invariants ensure that the total running time is `O(n log n)` worst-case. +/// The invariants ensure that the total running time is *O*(*n* \* log(*n*)) worst-case. /// /// # Safety /// @@ -497,12 +485,13 @@ where // get copied into `dest_left` and `dest_right``. mem::forget(s); - // Convert the pointers to `usize` because `*mut T` is not `Send`. - let dest_l = dest as usize; - let dest_r = dest.add(left_l.len() + right_l.len()) as usize; + // Wrap pointers in SendPtr so that they can be sent to another thread + // See the documentation of SendPtr for a full explanation + let dest_l = SendPtr(dest); + let dest_r = SendPtr(dest.add(left_l.len() + right_l.len())); rayon_core::join( - || par_merge(left_l, right_l, dest_l as *mut T, is_less), - || par_merge(left_r, right_r, dest_r as *mut T, is_less), + move || par_merge(left_l, right_l, dest_l.get(), is_less), + move || par_merge(left_r, right_r, dest_r.get(), is_less), ); } // Finally, `s` gets dropped if we used sequential merge, thus copying the remaining elements @@ -580,7 +569,7 @@ unsafe fn recurse<T, F>( // After recursive calls finish we'll have to merge chunks `(start, mid)` and `(mid, end)` from // `src` into `dest`. If the current invocation has to store the result into `buf`, we'll - // merge chunks from `v` into `buf`, and viceversa. + // merge chunks from `v` into `buf`, and vice versa. // // Recursive calls flip `into_buf` at each level of recursion. More concretely, `par_merge` // merges chunks from `buf` into `v` at the first level, from `v` into `buf` at the second @@ -598,12 +587,13 @@ unsafe fn recurse<T, F>( len: end - start, }; - // Convert the pointers to `usize` because `*mut T` is not `Send`. - let v = v as usize; - let buf = buf as usize; + // Wrap pointers in SendPtr so that they can be sent to another thread + // See the documentation of SendPtr for a full explanation + let v = SendPtr(v); + let buf = SendPtr(buf); rayon_core::join( - || recurse(v as *mut T, buf as *mut T, left, !into_buf, is_less), - || recurse(v as *mut T, buf as *mut T, right, !into_buf, is_less), + move || recurse(v.get(), buf.get(), left, !into_buf, is_less), + move || recurse(v.get(), buf.get(), right, !into_buf, is_less), ); // Everything went all right - recursive calls didn't panic. @@ -667,18 +657,20 @@ where // Split the slice into chunks and merge sort them in parallel. // However, descending chunks will not be sorted - they will be simply left intact. let mut iter = { - // Convert the pointer to `usize` because `*mut T` is not `Send`. - let buf = buf as usize; + // Wrap pointer in SendPtr so that it can be sent to another thread + // See the documentation of SendPtr for a full explanation + let buf = SendPtr(buf); + let is_less = &is_less; v.par_chunks_mut(CHUNK_LENGTH) .with_max_len(1) .enumerate() - .map(|(i, chunk)| { + .map(move |(i, chunk)| { let l = CHUNK_LENGTH * i; let r = l + chunk.len(); unsafe { - let buf = (buf as *mut T).add(l); - (l, r, mergesort(chunk, buf, &is_less)) + let buf = buf.get().add(l); + (l, r, mergesort(chunk, buf, is_less)) } }) .collect::<Vec<_>>() @@ -739,7 +731,7 @@ mod tests { check(&[1, 2, 2, 2, 2, 3], &[]); check(&[], &[1, 2, 2, 2, 2, 3]); - let ref mut rng = thread_rng(); + let rng = &mut thread_rng(); for _ in 0..100 { let limit: u32 = rng.gen_range(1..21); |