diff options
Diffstat (limited to 'src/stats/bivariate/mod.rs')
-rwxr-xr-x | src/stats/bivariate/mod.rs | 131 |
1 files changed, 131 insertions, 0 deletions
diff --git a/src/stats/bivariate/mod.rs b/src/stats/bivariate/mod.rs new file mode 100755 index 0000000..b233b60 --- /dev/null +++ b/src/stats/bivariate/mod.rs @@ -0,0 +1,131 @@ +//! Bivariate analysis + +mod bootstrap; +pub mod regression; +mod resamples; + +use crate::stats::bivariate::resamples::Resamples; +use crate::stats::float::Float; +use crate::stats::tuple::{Tuple, TupledDistributionsBuilder}; +use crate::stats::univariate::Sample; +use rayon::iter::{IntoParallelIterator, ParallelIterator}; + +/// Bivariate `(X, Y)` data +/// +/// Invariants: +/// +/// - No `NaN`s in the data +/// - At least two data points in the set +pub struct Data<'a, X, Y>(&'a [X], &'a [Y]); + +impl<'a, X, Y> Copy for Data<'a, X, Y> {} + +#[cfg_attr(feature = "cargo-clippy", allow(clippy::expl_impl_clone_on_copy))] +impl<'a, X, Y> Clone for Data<'a, X, Y> { + fn clone(&self) -> Data<'a, X, Y> { + *self + } +} + +impl<'a, X, Y> Data<'a, X, Y> { + /// Returns the length of the data set + pub fn len(&self) -> usize { + self.0.len() + } + + /// Iterate over the data set + pub fn iter(&self) -> Pairs<'a, X, Y> { + Pairs { + data: *self, + state: 0, + } + } +} + +impl<'a, X, Y> Data<'a, X, Y> +where + X: Float, + Y: Float, +{ + /// Creates a new data set from two existing slices + pub fn new(xs: &'a [X], ys: &'a [Y]) -> Data<'a, X, Y> { + assert!( + xs.len() == ys.len() + && xs.len() > 1 + && xs.iter().all(|x| !x.is_nan()) + && ys.iter().all(|y| !y.is_nan()) + ); + + Data(xs, ys) + } + + // TODO Remove the `T` parameter in favor of `S::Output` + /// Returns the bootstrap distributions of the parameters estimated by the `statistic` + /// + /// - Multi-threaded + /// - Time: `O(nresamples)` + /// - Memory: `O(nresamples)` + pub fn bootstrap<T, S>(&self, nresamples: usize, statistic: S) -> T::Distributions + where + S: Fn(Data<X, Y>) -> T + Sync, + T: Tuple + Send, + T::Distributions: Send, + T::Builder: Send, + { + (0..nresamples) + .into_par_iter() + .map_init( + || Resamples::new(*self), + |resamples, _| statistic(resamples.next()), + ) + .fold( + || T::Builder::new(0), + |mut sub_distributions, sample| { + sub_distributions.push(sample); + sub_distributions + }, + ) + .reduce( + || T::Builder::new(0), + |mut a, mut b| { + a.extend(&mut b); + a + }, + ) + .complete() + } + + /// Returns a view into the `X` data + pub fn x(&self) -> &'a Sample<X> { + Sample::new(&self.0) + } + + /// Returns a view into the `Y` data + pub fn y(&self) -> &'a Sample<Y> { + Sample::new(&self.1) + } +} + +/// Iterator over `Data` +pub struct Pairs<'a, X: 'a, Y: 'a> { + data: Data<'a, X, Y>, + state: usize, +} + +impl<'a, X, Y> Iterator for Pairs<'a, X, Y> { + type Item = (&'a X, &'a Y); + + fn next(&mut self) -> Option<(&'a X, &'a Y)> { + if self.state < self.data.len() { + let i = self.state; + self.state += 1; + + // This is safe because i will always be < self.data.{0,1}.len() + debug_assert!(i < self.data.0.len()); + debug_assert!(i < self.data.1.len()); + unsafe { Some((self.data.0.get_unchecked(i), self.data.1.get_unchecked(i))) } + } else { + None + } + } +} |