aboutsummaryrefslogtreecommitdiff
path: root/src/stats/univariate/resamples.rs
blob: 831bc7abd1380bbc3d4552b9f6efd34cd0bf51e8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
use std::mem;

use crate::stats::float::Float;
use crate::stats::rand_util::{new_rng, Rng};
use crate::stats::univariate::Sample;

pub struct Resamples<'a, A>
where
    A: Float,
{
    rng: Rng,
    sample: &'a [A],
    stage: Option<Vec<A>>,
}

#[cfg_attr(feature = "cargo-clippy", allow(clippy::should_implement_trait))]
impl<'a, A> Resamples<'a, A>
where
    A: 'a + Float,
{
    pub fn new(sample: &'a Sample<A>) -> Resamples<'a, A> {
        let slice = sample;

        Resamples {
            rng: new_rng(),
            sample: slice,
            stage: None,
        }
    }

    pub fn next(&mut self) -> &Sample<A> {
        let n = self.sample.len();
        let rng = &mut self.rng;

        match self.stage {
            None => {
                let mut stage = Vec::with_capacity(n);

                for _ in 0..n {
                    let idx = rng.rand_range(0u64..(self.sample.len() as u64));
                    stage.push(self.sample[idx as usize])
                }

                self.stage = Some(stage);
            }
            Some(ref mut stage) => {
                for elem in stage.iter_mut() {
                    let idx = rng.rand_range(0u64..(self.sample.len() as u64));
                    *elem = self.sample[idx as usize]
                }
            }
        }

        if let Some(ref v) = self.stage {
            unsafe { mem::transmute::<&[_], _>(v) }
        } else {
            unreachable!();
        }
    }
}

#[cfg(test)]
mod test {
    use quickcheck::quickcheck;
    use quickcheck::TestResult;
    use std::collections::HashSet;

    use crate::stats::univariate::resamples::Resamples;
    use crate::stats::univariate::Sample;

    // Check that the resample is a subset of the sample
    quickcheck! {
        fn subset(size: usize, nresamples: usize) -> TestResult {
            if size > 1 {
                let v: Vec<_> = (0..size).map(|i| i as f32).collect();
                let sample = Sample::new(&v);
                let mut resamples = Resamples::new(sample);
                let sample = v.iter().map(|&x| x as i64).collect::<HashSet<_>>();

                TestResult::from_bool((0..nresamples).all(|_| {
                    let resample = resamples.next()

                        .iter()
                        .map(|&x| x as i64)
                        .collect::<HashSet<_>>();

                    resample.is_subset(&sample)
                }))
            } else {
                TestResult::discard()
            }
        }
    }

    #[test]
    fn different_subsets() {
        let size = 1000;
        let v: Vec<_> = (0..size).map(|i| i as f32).collect();
        let sample = Sample::new(&v);
        let mut resamples = Resamples::new(sample);

        // Hypothetically, we might see one duplicate, but more than one is likely to be a bug.
        let mut num_duplicated = 0;
        for _ in 0..1000 {
            let sample_1 = resamples.next().iter().cloned().collect::<Vec<_>>();
            let sample_2 = resamples.next().iter().cloned().collect::<Vec<_>>();

            if sample_1 == sample_2 {
                num_duplicated += 1;
            }
        }

        if num_duplicated > 1 {
            panic!("Found {} duplicate samples", num_duplicated);
        }
    }
}