diff options
Diffstat (limited to 'src/main/java/org/apache/commons/math3/stat/correlation/StorelessBivariateCovariance.java')
-rw-r--r-- | src/main/java/org/apache/commons/math3/stat/correlation/StorelessBivariateCovariance.java | 138 |
1 files changed, 138 insertions, 0 deletions
diff --git a/src/main/java/org/apache/commons/math3/stat/correlation/StorelessBivariateCovariance.java b/src/main/java/org/apache/commons/math3/stat/correlation/StorelessBivariateCovariance.java new file mode 100644 index 0000000..1a798d2 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/stat/correlation/StorelessBivariateCovariance.java @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.stat.correlation; + +import org.apache.commons.math3.exception.NumberIsTooSmallException; +import org.apache.commons.math3.exception.util.LocalizedFormats; + +/** + * Bivariate Covariance implementation that does not require input data to be + * stored in memory. + * + * <p>This class is based on a paper written by Philippe Pébay: + * <a href="http://prod.sandia.gov/techlib/access-control.cgi/2008/086212.pdf"> + * Formulas for Robust, One-Pass Parallel Computation of Covariances and + * Arbitrary-Order Statistical Moments</a>, 2008, Technical Report SAND2008-6212, + * Sandia National Laboratories. It computes the covariance for a pair of variables. + * Use {@link StorelessCovariance} to estimate an entire covariance matrix.</p> + * + * <p>Note: This class is package private as it is only used internally in + * the {@link StorelessCovariance} class.</p> + * + * @since 3.0 + */ +class StorelessBivariateCovariance { + + /** the mean of variable x */ + private double meanX; + + /** the mean of variable y */ + private double meanY; + + /** number of observations */ + private double n; + + /** the running covariance estimate */ + private double covarianceNumerator; + + /** flag for bias correction */ + private boolean biasCorrected; + + /** + * Create an empty {@link StorelessBivariateCovariance} instance with + * bias correction. + */ + StorelessBivariateCovariance() { + this(true); + } + + /** + * Create an empty {@link StorelessBivariateCovariance} instance. + * + * @param biasCorrection if <code>true</code> the covariance estimate is corrected + * for bias, i.e. n-1 in the denominator, otherwise there is no bias correction, + * i.e. n in the denominator. + */ + StorelessBivariateCovariance(final boolean biasCorrection) { + meanX = meanY = 0.0; + n = 0; + covarianceNumerator = 0.0; + biasCorrected = biasCorrection; + } + + /** + * Update the covariance estimation with a pair of variables (x, y). + * + * @param x the x value + * @param y the y value + */ + public void increment(final double x, final double y) { + n++; + final double deltaX = x - meanX; + final double deltaY = y - meanY; + meanX += deltaX / n; + meanY += deltaY / n; + covarianceNumerator += ((n - 1.0) / n) * deltaX * deltaY; + } + + /** + * Appends another bivariate covariance calculation to this. + * After this operation, statistics returned should be close to what would + * have been obtained by by performing all of the {@link #increment(double, double)} + * operations in {@code cov} directly on this. + * + * @param cov StorelessBivariateCovariance instance to append. + */ + public void append(StorelessBivariateCovariance cov) { + double oldN = n; + n += cov.n; + final double deltaX = cov.meanX - meanX; + final double deltaY = cov.meanY - meanY; + meanX += deltaX * cov.n / n; + meanY += deltaY * cov.n / n; + covarianceNumerator += cov.covarianceNumerator + oldN * cov.n / n * deltaX * deltaY; + } + + /** + * Returns the number of observations. + * + * @return number of observations + */ + public double getN() { + return n; + } + + /** + * Return the current covariance estimate. + * + * @return the current covariance + * @throws NumberIsTooSmallException if the number of observations + * is < 2 + */ + public double getResult() throws NumberIsTooSmallException { + if (n < 2) { + throw new NumberIsTooSmallException(LocalizedFormats.INSUFFICIENT_DIMENSION, + n, 2, true); + } + if (biasCorrected) { + return covarianceNumerator / (n - 1d); + } else { + return covarianceNumerator / n; + } + } +} + |