1 files changed, 140 insertions, 0 deletions
diff --git a/src/stats/univariate/kde/mod.rs b/src/stats/univariate/kde/mod.rs
new file mode 100755
index 0000000..efc27cd
--- /dev/null
+++ b/src/stats/univariate/kde/mod.rs
@@ -0,0 +1,140 @@
+//! Kernel density estimation
+
+pub mod kernel;
+
+use self::kernel::Kernel;
+use crate::stats::float::Float;
+use crate::stats::univariate::Sample;
+use rayon::prelude::*;
+
+/// Univariate kernel density estimator
+pub struct Kde<'a, A, K>
+where
+    A: Float,
+    K: Kernel<A>,
+{
+    bandwidth: A,
+    kernel: K,
+    sample: &'a Sample<A>,
+}
+
+impl<'a, A, K> Kde<'a, A, K>
+where
+    A: 'a + Float,
+    K: Kernel<A>,
+{
+    /// Creates a new kernel density estimator from the `sample`, using a kernel and estimating
+    /// the bandwidth using the method `bw`
+    pub fn new(sample: &'a Sample<A>, kernel: K, bw: Bandwidth) -> Kde<'a, A, K> {
+        Kde {
+            bandwidth: bw.estimate(sample),
+            kernel,
+            sample,
+        }
+    }
+
+    /// Returns the bandwidth used by the estimator
+    pub fn bandwidth(&self) -> A {
+        self.bandwidth
+    }
+
+    /// Maps the KDE over `xs`
+    ///
+    /// - Multihreaded
+    pub fn map(&self, xs: &[A]) -> Box<[A]> {
+        xs.par_iter()
+            .map(|&x| self.estimate(x))
+            .collect::<Vec<_>>()
+            .into_boxed_slice()
+    }
+
+    /// Estimates the probability density of `x`
+    pub fn estimate(&self, x: A) -> A {
+        let _0 = A::cast(0);
+        let slice = self.sample;
+        let h = self.bandwidth;
+        let n = A::cast(slice.len());
+
+        let sum = slice
+            .iter()
+            .fold(_0, |acc, &x_i| acc + self.kernel.evaluate((x - x_i) / h));
+
+        sum / (h * n)
+    }
+}
+
+/// Method to estimate the bandwidth
+pub enum Bandwidth {
+    /// Use Silverman's rule of thumb to estimate the bandwidth from the sample
+    Silverman,
+}
+
+impl Bandwidth {
+    fn estimate<A: Float>(self, sample: &Sample<A>) -> A {
+        match self {
+            Bandwidth::Silverman => {
+                let factor = A::cast(4. / 3.);
+                let exponent = A::cast(1. / 5.);
+                let n = A::cast(sample.len());
+                let sigma = sample.std_dev(None);
+
+                sigma * (factor / n).powf(exponent)
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+macro_rules! test {
+    ($ty:ident) => {
+        mod $ty {
+            use approx::relative_eq;
+            use quickcheck::quickcheck;
+            use quickcheck::TestResult;
+
+            use crate::stats::univariate::kde::kernel::Gaussian;
+            use crate::stats::univariate::kde::{Bandwidth, Kde};
+            use crate::stats::univariate::Sample;
+
+            // The [-inf inf] integral of the estimated PDF should be one
+            quickcheck! {
+                fn integral(size: usize, start: usize) -> TestResult {
+                    const DX: $ty = 1e-3;
+
+                    if let Some(v) = crate::stats::test::vec::<$ty>(size, start) {
+                        let slice = &v[start..];
+                        let data = Sample::new(slice);
+                        let kde = Kde::new(data, Gaussian, Bandwidth::Silverman);
+                        let h = kde.bandwidth();
+                        // NB Obviously a [-inf inf] integral is not feasible, but this range works
+                        // quite well
+                        let (a, b) = (data.min() - 5. * h, data.max() + 5. * h);
+
+                        let mut acc = 0.;
+                        let mut x = a;
+                        let mut y = kde.estimate(a);
+
+                        while x < b {
+                            acc += DX * y / 2.;
+
+                            x += DX;
+                            y = kde.estimate(x);
+
+                            acc += DX * y / 2.;
+                        }
+
+                        TestResult::from_bool(relative_eq!(acc, 1., epsilon = 2e-5))
+                    } else {
+                        TestResult::discard()
+                    }
+                }
+            }
+        }
+    };
+}
+
+#[cfg(test)]
+mod test {
+    test!(f32);
+    test!(f64);
+}