1 files changed, 30 insertions, 38 deletions
diff --git a/src/slice/mergesort.rs b/src/slice/mergesort.rs
index e9a5d43..fec309d 100644
--- a/src/slice/mergesort.rs
+++ b/src/slice/mergesort.rs
@@ -6,6 +6,7 @@
 
 use crate::iter::*;
 use crate::slice::ParallelSliceMut;
+use crate::SendPtr;
 use std::mem;
 use std::mem::size_of;
 use std::ptr;
@@ -24,7 +25,7 @@ unsafe fn decrement_and_get<T>(ptr: &mut *mut T) -> *mut T {
 
 /// When dropped, copies from `src` into `dest` a sequence of length `len`.
 struct CopyOnDrop<T> {
-    src: *mut T,
+    src: *const T,
     dest: *mut T,
     len: usize,
 }
@@ -63,9 +64,7 @@ where
             //    performance than with the 2nd method.
             //
             // All methods were benchmarked, and the 3rd showed best results. So we chose that one.
-            let mut tmp = NoDrop {
-                value: Some(ptr::read(&v[0])),
-            };
+            let tmp = mem::ManuallyDrop::new(ptr::read(&v[0]));
 
             // Intermediate state of the insertion process is always tracked by `hole`, which
             // serves two purposes:
@@ -78,13 +77,13 @@ where
             // fill the hole in `v` with `tmp`, thus ensuring that `v` still holds every object it
             // initially held exactly once.
             let mut hole = InsertionHole {
-                src: tmp.value.as_mut().unwrap(),
+                src: &*tmp,
                 dest: &mut v[1],
             };
             ptr::copy_nonoverlapping(&v[1], &mut v[0], 1);
 
             for i in 2..v.len() {
-                if !is_less(&v[i], tmp.value.as_ref().unwrap()) {
+                if !is_less(&v[i], &*tmp) {
                     break;
                 }
                 ptr::copy_nonoverlapping(&v[i], &mut v[i - 1], 1);
@@ -94,20 +93,9 @@ where
         }
     }
 
-    // Holds a value, but never drops it.
-    struct NoDrop<T> {
-        value: Option<T>,
-    }
-
-    impl<T> Drop for NoDrop<T> {
-        fn drop(&mut self) {
-            mem::forget(self.value.take());
-        }
-    }
-
     // When dropped, copies from `src` into `dest`.
     struct InsertionHole<T> {
-        src: *mut T,
+        src: *const T,
         dest: *mut T,
     }
 
@@ -217,8 +205,8 @@ where
     impl<T> Drop for MergeHole<T> {
         fn drop(&mut self) {
             // `T` is not a zero-sized type, so it's okay to divide by its size.
-            let len = (self.end as usize - self.start as usize) / size_of::<T>();
             unsafe {
+                let len = self.end.offset_from(self.start) as usize;
                 ptr::copy_nonoverlapping(self.start, self.dest, len);
             }
         }
@@ -284,7 +272,7 @@ fn collapse(runs: &[Run]) -> Option<usize> {
 /// Otherwise, it sorts the slice into non-descending order.
 ///
 /// This merge sort borrows some (but not all) ideas from TimSort, which is described in detail
-/// [here](https://svn.python.org/projects/python/trunk/Objects/listsort.txt).
+/// [here](https://github.com/python/cpython/blob/main/Objects/listsort.txt).
 ///
 /// The algorithm identifies strictly descending and non-descending subsequences, which are called
 /// natural runs. There is a stack of pending runs yet to be merged. Each newly found run is pushed
@@ -294,7 +282,7 @@ fn collapse(runs: &[Run]) -> Option<usize> {
 /// 1. for every `i` in `1..runs.len()`: `runs[i - 1].len > runs[i].len`
 /// 2. for every `i` in `2..runs.len()`: `runs[i - 2].len > runs[i - 1].len + runs[i].len`
 ///
-/// The invariants ensure that the total running time is `O(n log n)` worst-case.
+/// The invariants ensure that the total running time is *O*(*n* \* log(*n*)) worst-case.
 ///
 /// # Safety
 ///
@@ -497,12 +485,13 @@ where
         // get copied into `dest_left` and `dest_right``.
         mem::forget(s);
 
-        // Convert the pointers to `usize` because `*mut T` is not `Send`.
-        let dest_l = dest as usize;
-        let dest_r = dest.add(left_l.len() + right_l.len()) as usize;
+        // Wrap pointers in SendPtr so that they can be sent to another thread
+        // See the documentation of SendPtr for a full explanation
+        let dest_l = SendPtr(dest);
+        let dest_r = SendPtr(dest.add(left_l.len() + right_l.len()));
         rayon_core::join(
-            || par_merge(left_l, right_l, dest_l as *mut T, is_less),
-            || par_merge(left_r, right_r, dest_r as *mut T, is_less),
+            move || par_merge(left_l, right_l, dest_l.get(), is_less),
+            move || par_merge(left_r, right_r, dest_r.get(), is_less),
         );
     }
     // Finally, `s` gets dropped if we used sequential merge, thus copying the remaining elements
@@ -580,7 +569,7 @@ unsafe fn recurse<T, F>(
 
     // After recursive calls finish we'll have to merge chunks `(start, mid)` and `(mid, end)` from
     // `src` into `dest`. If the current invocation has to store the result into `buf`, we'll
-    // merge chunks from `v` into `buf`, and viceversa.
+    // merge chunks from `v` into `buf`, and vice versa.
     //
     // Recursive calls flip `into_buf` at each level of recursion. More concretely, `par_merge`
     // merges chunks from `buf` into `v` at the first level, from `v` into `buf` at the second
@@ -598,12 +587,13 @@ unsafe fn recurse<T, F>(
         len: end - start,
     };
 
-    // Convert the pointers to `usize` because `*mut T` is not `Send`.
-    let v = v as usize;
-    let buf = buf as usize;
+    // Wrap pointers in SendPtr so that they can be sent to another thread
+    // See the documentation of SendPtr for a full explanation
+    let v = SendPtr(v);
+    let buf = SendPtr(buf);
     rayon_core::join(
-        || recurse(v as *mut T, buf as *mut T, left, !into_buf, is_less),
-        || recurse(v as *mut T, buf as *mut T, right, !into_buf, is_less),
+        move || recurse(v.get(), buf.get(), left, !into_buf, is_less),
+        move || recurse(v.get(), buf.get(), right, !into_buf, is_less),
     );
 
     // Everything went all right - recursive calls didn't panic.
@@ -667,18 +657,20 @@ where
     // Split the slice into chunks and merge sort them in parallel.
     // However, descending chunks will not be sorted - they will be simply left intact.
     let mut iter = {
-        // Convert the pointer to `usize` because `*mut T` is not `Send`.
-        let buf = buf as usize;
+        // Wrap pointer in SendPtr so that it can be sent to another thread
+        // See the documentation of SendPtr for a full explanation
+        let buf = SendPtr(buf);
+        let is_less = &is_less;
 
         v.par_chunks_mut(CHUNK_LENGTH)
             .with_max_len(1)
             .enumerate()
-            .map(|(i, chunk)| {
+            .map(move |(i, chunk)| {
                 let l = CHUNK_LENGTH * i;
                 let r = l + chunk.len();
                 unsafe {
-                    let buf = (buf as *mut T).add(l);
-                    (l, r, mergesort(chunk, buf, &is_less))
+                    let buf = buf.get().add(l);
+                    (l, r, mergesort(chunk, buf, is_less))
                 }
             })
             .collect::<Vec<_>>()
@@ -739,7 +731,7 @@ mod tests {
         check(&[1, 2, 2, 2, 2, 3], &[]);
         check(&[], &[1, 2, 2, 2, 2, 3]);
 
-        let ref mut rng = thread_rng();
+        let rng = &mut thread_rng();
 
         for _ in 0..100 {
             let limit: u32 = rng.gen_range(1..21);