summaryrefslogtreecommitdiff
path: root/src/main/java/org/apache/commons/math3/stat/correlation
diff options
context:
space:
mode:
Diffstat (limited to 'src/main/java/org/apache/commons/math3/stat/correlation')
-rw-r--r--src/main/java/org/apache/commons/math3/stat/correlation/Covariance.java295
-rw-r--r--src/main/java/org/apache/commons/math3/stat/correlation/KendallsCorrelation.java272
-rw-r--r--src/main/java/org/apache/commons/math3/stat/correlation/PearsonsCorrelation.java330
-rw-r--r--src/main/java/org/apache/commons/math3/stat/correlation/SpearmansCorrelation.java262
-rw-r--r--src/main/java/org/apache/commons/math3/stat/correlation/StorelessBivariateCovariance.java138
-rw-r--r--src/main/java/org/apache/commons/math3/stat/correlation/StorelessCovariance.java229
-rw-r--r--src/main/java/org/apache/commons/math3/stat/correlation/package-info.java22
7 files changed, 1548 insertions, 0 deletions
diff --git a/src/main/java/org/apache/commons/math3/stat/correlation/Covariance.java b/src/main/java/org/apache/commons/math3/stat/correlation/Covariance.java
new file mode 100644
index 0000000..c462401
--- /dev/null
+++ b/src/main/java/org/apache/commons/math3/stat/correlation/Covariance.java
@@ -0,0 +1,295 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math3.stat.correlation;
+
+import org.apache.commons.math3.exception.MathIllegalArgumentException;
+import org.apache.commons.math3.exception.NotStrictlyPositiveException;
+import org.apache.commons.math3.exception.util.LocalizedFormats;
+import org.apache.commons.math3.linear.RealMatrix;
+import org.apache.commons.math3.linear.BlockRealMatrix;
+import org.apache.commons.math3.stat.descriptive.moment.Mean;
+import org.apache.commons.math3.stat.descriptive.moment.Variance;
+
+/**
+ * Computes covariances for pairs of arrays or columns of a matrix.
+ *
+ * <p>The constructors that take <code>RealMatrix</code> or
+ * <code>double[][]</code> arguments generate covariance matrices. The
+ * columns of the input matrices are assumed to represent variable values.</p>
+ *
+ * <p>The constructor argument <code>biasCorrected</code> determines whether or
+ * not computed covariances are bias-corrected.</p>
+ *
+ * <p>Unbiased covariances are given by the formula</p>
+ * <code>cov(X, Y) = &Sigma;[(x<sub>i</sub> - E(X))(y<sub>i</sub> - E(Y))] / (n - 1)</code>
+ * where <code>E(X)</code> is the mean of <code>X</code> and <code>E(Y)</code>
+ * is the mean of the <code>Y</code> values.
+ *
+ * <p>Non-bias-corrected estimates use <code>n</code> in place of <code>n - 1</code>
+ *
+ * @since 2.0
+ */
+public class Covariance {
+
+ /** covariance matrix */
+ private final RealMatrix covarianceMatrix;
+
+ /**
+ * Create an empty covariance matrix.
+ */
+ /** Number of observations (length of covariate vectors) */
+ private final int n;
+
+ /**
+ * Create a Covariance with no data
+ */
+ public Covariance() {
+ super();
+ covarianceMatrix = null;
+ n = 0;
+ }
+
+ /**
+ * Create a Covariance matrix from a rectangular array
+ * whose columns represent covariates.
+ *
+ * <p>The <code>biasCorrected</code> parameter determines whether or not
+ * covariance estimates are bias-corrected.</p>
+ *
+ * <p>The input array must be rectangular with at least one column
+ * and two rows.</p>
+ *
+ * @param data rectangular array with columns representing covariates
+ * @param biasCorrected true means covariances are bias-corrected
+ * @throws MathIllegalArgumentException if the input data array is not
+ * rectangular with at least two rows and one column.
+ * @throws NotStrictlyPositiveException if the input data array is not
+ * rectangular with at least one row and one column.
+ */
+ public Covariance(double[][] data, boolean biasCorrected)
+ throws MathIllegalArgumentException, NotStrictlyPositiveException {
+ this(new BlockRealMatrix(data), biasCorrected);
+ }
+
+ /**
+ * Create a Covariance matrix from a rectangular array
+ * whose columns represent covariates.
+ *
+ * <p>The input array must be rectangular with at least one column
+ * and two rows</p>
+ *
+ * @param data rectangular array with columns representing covariates
+ * @throws MathIllegalArgumentException if the input data array is not
+ * rectangular with at least two rows and one column.
+ * @throws NotStrictlyPositiveException if the input data array is not
+ * rectangular with at least one row and one column.
+ */
+ public Covariance(double[][] data)
+ throws MathIllegalArgumentException, NotStrictlyPositiveException {
+ this(data, true);
+ }
+
+ /**
+ * Create a covariance matrix from a matrix whose columns
+ * represent covariates.
+ *
+ * <p>The <code>biasCorrected</code> parameter determines whether or not
+ * covariance estimates are bias-corrected.</p>
+ *
+ * <p>The matrix must have at least one column and two rows</p>
+ *
+ * @param matrix matrix with columns representing covariates
+ * @param biasCorrected true means covariances are bias-corrected
+ * @throws MathIllegalArgumentException if the input matrix does not have
+ * at least two rows and one column
+ */
+ public Covariance(RealMatrix matrix, boolean biasCorrected)
+ throws MathIllegalArgumentException {
+ checkSufficientData(matrix);
+ n = matrix.getRowDimension();
+ covarianceMatrix = computeCovarianceMatrix(matrix, biasCorrected);
+ }
+
+ /**
+ * Create a covariance matrix from a matrix whose columns
+ * represent covariates.
+ *
+ * <p>The matrix must have at least one column and two rows</p>
+ *
+ * @param matrix matrix with columns representing covariates
+ * @throws MathIllegalArgumentException if the input matrix does not have
+ * at least two rows and one column
+ */
+ public Covariance(RealMatrix matrix) throws MathIllegalArgumentException {
+ this(matrix, true);
+ }
+
+ /**
+ * Returns the covariance matrix
+ *
+ * @return covariance matrix
+ */
+ public RealMatrix getCovarianceMatrix() {
+ return covarianceMatrix;
+ }
+
+ /**
+ * Returns the number of observations (length of covariate vectors)
+ *
+ * @return number of observations
+ */
+ public int getN() {
+ return n;
+ }
+
+ /**
+ * Compute a covariance matrix from a matrix whose columns represent
+ * covariates.
+ * @param matrix input matrix (must have at least one column and two rows)
+ * @param biasCorrected determines whether or not covariance estimates are bias-corrected
+ * @return covariance matrix
+ * @throws MathIllegalArgumentException if the matrix does not contain sufficient data
+ */
+ protected RealMatrix computeCovarianceMatrix(RealMatrix matrix, boolean biasCorrected)
+ throws MathIllegalArgumentException {
+ int dimension = matrix.getColumnDimension();
+ Variance variance = new Variance(biasCorrected);
+ RealMatrix outMatrix = new BlockRealMatrix(dimension, dimension);
+ for (int i = 0; i < dimension; i++) {
+ for (int j = 0; j < i; j++) {
+ double cov = covariance(matrix.getColumn(i), matrix.getColumn(j), biasCorrected);
+ outMatrix.setEntry(i, j, cov);
+ outMatrix.setEntry(j, i, cov);
+ }
+ outMatrix.setEntry(i, i, variance.evaluate(matrix.getColumn(i)));
+ }
+ return outMatrix;
+ }
+
+ /**
+ * Create a covariance matrix from a matrix whose columns represent
+ * covariates. Covariances are computed using the bias-corrected formula.
+ * @param matrix input matrix (must have at least one column and two rows)
+ * @return covariance matrix
+ * @throws MathIllegalArgumentException if matrix does not contain sufficient data
+ * @see #Covariance
+ */
+ protected RealMatrix computeCovarianceMatrix(RealMatrix matrix)
+ throws MathIllegalArgumentException {
+ return computeCovarianceMatrix(matrix, true);
+ }
+
+ /**
+ * Compute a covariance matrix from a rectangular array whose columns represent
+ * covariates.
+ * @param data input array (must have at least one column and two rows)
+ * @param biasCorrected determines whether or not covariance estimates are bias-corrected
+ * @return covariance matrix
+ * @throws MathIllegalArgumentException if the data array does not contain sufficient
+ * data
+ * @throws NotStrictlyPositiveException if the input data array is not
+ * rectangular with at least one row and one column.
+ */
+ protected RealMatrix computeCovarianceMatrix(double[][] data, boolean biasCorrected)
+ throws MathIllegalArgumentException, NotStrictlyPositiveException {
+ return computeCovarianceMatrix(new BlockRealMatrix(data), biasCorrected);
+ }
+
+ /**
+ * Create a covariance matrix from a rectangular array whose columns represent
+ * covariates. Covariances are computed using the bias-corrected formula.
+ * @param data input array (must have at least one column and two rows)
+ * @return covariance matrix
+ * @throws MathIllegalArgumentException if the data array does not contain sufficient data
+ * @throws NotStrictlyPositiveException if the input data array is not
+ * rectangular with at least one row and one column.
+ * @see #Covariance
+ */
+ protected RealMatrix computeCovarianceMatrix(double[][] data)
+ throws MathIllegalArgumentException, NotStrictlyPositiveException {
+ return computeCovarianceMatrix(data, true);
+ }
+
+ /**
+ * Computes the covariance between the two arrays.
+ *
+ * <p>Array lengths must match and the common length must be at least 2.</p>
+ *
+ * @param xArray first data array
+ * @param yArray second data array
+ * @param biasCorrected if true, returned value will be bias-corrected
+ * @return returns the covariance for the two arrays
+ * @throws MathIllegalArgumentException if the arrays lengths do not match or
+ * there is insufficient data
+ */
+ public double covariance(final double[] xArray, final double[] yArray, boolean biasCorrected)
+ throws MathIllegalArgumentException {
+ Mean mean = new Mean();
+ double result = 0d;
+ int length = xArray.length;
+ if (length != yArray.length) {
+ throw new MathIllegalArgumentException(
+ LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE, length, yArray.length);
+ } else if (length < 2) {
+ throw new MathIllegalArgumentException(
+ LocalizedFormats.INSUFFICIENT_OBSERVED_POINTS_IN_SAMPLE, length, 2);
+ } else {
+ double xMean = mean.evaluate(xArray);
+ double yMean = mean.evaluate(yArray);
+ for (int i = 0; i < length; i++) {
+ double xDev = xArray[i] - xMean;
+ double yDev = yArray[i] - yMean;
+ result += (xDev * yDev - result) / (i + 1);
+ }
+ }
+ return biasCorrected ? result * ((double) length / (double)(length - 1)) : result;
+ }
+
+ /**
+ * Computes the covariance between the two arrays, using the bias-corrected
+ * formula.
+ *
+ * <p>Array lengths must match and the common length must be at least 2.</p>
+ *
+ * @param xArray first data array
+ * @param yArray second data array
+ * @return returns the covariance for the two arrays
+ * @throws MathIllegalArgumentException if the arrays lengths do not match or
+ * there is insufficient data
+ */
+ public double covariance(final double[] xArray, final double[] yArray)
+ throws MathIllegalArgumentException {
+ return covariance(xArray, yArray, true);
+ }
+
+ /**
+ * Throws MathIllegalArgumentException if the matrix does not have at least
+ * one column and two rows.
+ * @param matrix matrix to check
+ * @throws MathIllegalArgumentException if the matrix does not contain sufficient data
+ * to compute covariance
+ */
+ private void checkSufficientData(final RealMatrix matrix) throws MathIllegalArgumentException {
+ int nRows = matrix.getRowDimension();
+ int nCols = matrix.getColumnDimension();
+ if (nRows < 2 || nCols < 1) {
+ throw new MathIllegalArgumentException(
+ LocalizedFormats.INSUFFICIENT_ROWS_AND_COLUMNS,
+ nRows, nCols);
+ }
+ }
+}
diff --git a/src/main/java/org/apache/commons/math3/stat/correlation/KendallsCorrelation.java b/src/main/java/org/apache/commons/math3/stat/correlation/KendallsCorrelation.java
new file mode 100644
index 0000000..d38cf71
--- /dev/null
+++ b/src/main/java/org/apache/commons/math3/stat/correlation/KendallsCorrelation.java
@@ -0,0 +1,272 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math3.stat.correlation;
+
+import org.apache.commons.math3.exception.DimensionMismatchException;
+import org.apache.commons.math3.linear.BlockRealMatrix;
+import org.apache.commons.math3.linear.MatrixUtils;
+import org.apache.commons.math3.linear.RealMatrix;
+import org.apache.commons.math3.util.FastMath;
+import org.apache.commons.math3.util.Pair;
+
+import java.util.Arrays;
+import java.util.Comparator;
+
+/**
+ * Implementation of Kendall's Tau-b rank correlation</a>.
+ * <p>
+ * A pair of observations (x<sub>1</sub>, y<sub>1</sub>) and
+ * (x<sub>2</sub>, y<sub>2</sub>) are considered <i>concordant</i> if
+ * x<sub>1</sub> &lt; x<sub>2</sub> and y<sub>1</sub> &lt; y<sub>2</sub>
+ * or x<sub>2</sub> &lt; x<sub>1</sub> and y<sub>2</sub> &lt; y<sub>1</sub>.
+ * The pair is <i>discordant</i> if x<sub>1</sub> &lt; x<sub>2</sub> and
+ * y<sub>2</sub> &lt; y<sub>1</sub> or x<sub>2</sub> &lt; x<sub>1</sub> and
+ * y<sub>1</sub> &lt; y<sub>2</sub>. If either x<sub>1</sub> = x<sub>2</sub>
+ * or y<sub>1</sub> = y<sub>2</sub>, the pair is neither concordant nor
+ * discordant.
+ * <p>
+ * Kendall's Tau-b is defined as:
+ * <pre>
+ * tau<sub>b</sub> = (n<sub>c</sub> - n<sub>d</sub>) / sqrt((n<sub>0</sub> - n<sub>1</sub>) * (n<sub>0</sub> - n<sub>2</sub>))
+ * </pre>
+ * <p>
+ * where:
+ * <ul>
+ * <li>n<sub>0</sub> = n * (n - 1) / 2</li>
+ * <li>n<sub>c</sub> = Number of concordant pairs</li>
+ * <li>n<sub>d</sub> = Number of discordant pairs</li>
+ * <li>n<sub>1</sub> = sum of t<sub>i</sub> * (t<sub>i</sub> - 1) / 2 for all i</li>
+ * <li>n<sub>2</sub> = sum of u<sub>j</sub> * (u<sub>j</sub> - 1) / 2 for all j</li>
+ * <li>t<sub>i</sub> = Number of tied values in the i<sup>th</sup> group of ties in x</li>
+ * <li>u<sub>j</sub> = Number of tied values in the j<sup>th</sup> group of ties in y</li>
+ * </ul>
+ * <p>
+ * This implementation uses the O(n log n) algorithm described in
+ * William R. Knight's 1966 paper "A Computer Method for Calculating
+ * Kendall's Tau with Ungrouped Data" in the Journal of the American
+ * Statistical Association.
+ *
+ * @see <a href="http://en.wikipedia.org/wiki/Kendall_tau_rank_correlation_coefficient">
+ * Kendall tau rank correlation coefficient (Wikipedia)</a>
+ * @see <a href="http://www.jstor.org/stable/2282833">A Computer
+ * Method for Calculating Kendall's Tau with Ungrouped Data</a>
+ *
+ * @since 3.3
+ */
+public class KendallsCorrelation {
+
+ /** correlation matrix */
+ private final RealMatrix correlationMatrix;
+
+ /**
+ * Create a KendallsCorrelation instance without data.
+ */
+ public KendallsCorrelation() {
+ correlationMatrix = null;
+ }
+
+ /**
+ * Create a KendallsCorrelation from a rectangular array
+ * whose columns represent values of variables to be correlated.
+ *
+ * @param data rectangular array with columns representing variables
+ * @throws IllegalArgumentException if the input data array is not
+ * rectangular with at least two rows and two columns.
+ */
+ public KendallsCorrelation(double[][] data) {
+ this(MatrixUtils.createRealMatrix(data));
+ }
+
+ /**
+ * Create a KendallsCorrelation from a RealMatrix whose columns
+ * represent variables to be correlated.
+ *
+ * @param matrix matrix with columns representing variables to correlate
+ */
+ public KendallsCorrelation(RealMatrix matrix) {
+ correlationMatrix = computeCorrelationMatrix(matrix);
+ }
+
+ /**
+ * Returns the correlation matrix.
+ *
+ * @return correlation matrix
+ */
+ public RealMatrix getCorrelationMatrix() {
+ return correlationMatrix;
+ }
+
+ /**
+ * Computes the Kendall's Tau rank correlation matrix for the columns of
+ * the input matrix.
+ *
+ * @param matrix matrix with columns representing variables to correlate
+ * @return correlation matrix
+ */
+ public RealMatrix computeCorrelationMatrix(final RealMatrix matrix) {
+ int nVars = matrix.getColumnDimension();
+ RealMatrix outMatrix = new BlockRealMatrix(nVars, nVars);
+ for (int i = 0; i < nVars; i++) {
+ for (int j = 0; j < i; j++) {
+ double corr = correlation(matrix.getColumn(i), matrix.getColumn(j));
+ outMatrix.setEntry(i, j, corr);
+ outMatrix.setEntry(j, i, corr);
+ }
+ outMatrix.setEntry(i, i, 1d);
+ }
+ return outMatrix;
+ }
+
+ /**
+ * Computes the Kendall's Tau rank correlation matrix for the columns of
+ * the input rectangular array. The columns of the array represent values
+ * of variables to be correlated.
+ *
+ * @param matrix matrix with columns representing variables to correlate
+ * @return correlation matrix
+ */
+ public RealMatrix computeCorrelationMatrix(final double[][] matrix) {
+ return computeCorrelationMatrix(new BlockRealMatrix(matrix));
+ }
+
+ /**
+ * Computes the Kendall's Tau rank correlation coefficient between the two arrays.
+ *
+ * @param xArray first data array
+ * @param yArray second data array
+ * @return Returns Kendall's Tau rank correlation coefficient for the two arrays
+ * @throws DimensionMismatchException if the arrays lengths do not match
+ */
+ public double correlation(final double[] xArray, final double[] yArray)
+ throws DimensionMismatchException {
+
+ if (xArray.length != yArray.length) {
+ throw new DimensionMismatchException(xArray.length, yArray.length);
+ }
+
+ final int n = xArray.length;
+ final long numPairs = sum(n - 1);
+
+ @SuppressWarnings("unchecked")
+ Pair<Double, Double>[] pairs = new Pair[n];
+ for (int i = 0; i < n; i++) {
+ pairs[i] = new Pair<Double, Double>(xArray[i], yArray[i]);
+ }
+
+ Arrays.sort(pairs, new Comparator<Pair<Double, Double>>() {
+ /** {@inheritDoc} */
+ public int compare(Pair<Double, Double> pair1, Pair<Double, Double> pair2) {
+ int compareFirst = pair1.getFirst().compareTo(pair2.getFirst());
+ return compareFirst != 0 ? compareFirst : pair1.getSecond().compareTo(pair2.getSecond());
+ }
+ });
+
+ long tiedXPairs = 0;
+ long tiedXYPairs = 0;
+ long consecutiveXTies = 1;
+ long consecutiveXYTies = 1;
+ Pair<Double, Double> prev = pairs[0];
+ for (int i = 1; i < n; i++) {
+ final Pair<Double, Double> curr = pairs[i];
+ if (curr.getFirst().equals(prev.getFirst())) {
+ consecutiveXTies++;
+ if (curr.getSecond().equals(prev.getSecond())) {
+ consecutiveXYTies++;
+ } else {
+ tiedXYPairs += sum(consecutiveXYTies - 1);
+ consecutiveXYTies = 1;
+ }
+ } else {
+ tiedXPairs += sum(consecutiveXTies - 1);
+ consecutiveXTies = 1;
+ tiedXYPairs += sum(consecutiveXYTies - 1);
+ consecutiveXYTies = 1;
+ }
+ prev = curr;
+ }
+ tiedXPairs += sum(consecutiveXTies - 1);
+ tiedXYPairs += sum(consecutiveXYTies - 1);
+
+ long swaps = 0;
+ @SuppressWarnings("unchecked")
+ Pair<Double, Double>[] pairsDestination = new Pair[n];
+ for (int segmentSize = 1; segmentSize < n; segmentSize <<= 1) {
+ for (int offset = 0; offset < n; offset += 2 * segmentSize) {
+ int i = offset;
+ final int iEnd = FastMath.min(i + segmentSize, n);
+ int j = iEnd;
+ final int jEnd = FastMath.min(j + segmentSize, n);
+
+ int copyLocation = offset;
+ while (i < iEnd || j < jEnd) {
+ if (i < iEnd) {
+ if (j < jEnd) {
+ if (pairs[i].getSecond().compareTo(pairs[j].getSecond()) <= 0) {
+ pairsDestination[copyLocation] = pairs[i];
+ i++;
+ } else {
+ pairsDestination[copyLocation] = pairs[j];
+ j++;
+ swaps += iEnd - i;
+ }
+ } else {
+ pairsDestination[copyLocation] = pairs[i];
+ i++;
+ }
+ } else {
+ pairsDestination[copyLocation] = pairs[j];
+ j++;
+ }
+ copyLocation++;
+ }
+ }
+ final Pair<Double, Double>[] pairsTemp = pairs;
+ pairs = pairsDestination;
+ pairsDestination = pairsTemp;
+ }
+
+ long tiedYPairs = 0;
+ long consecutiveYTies = 1;
+ prev = pairs[0];
+ for (int i = 1; i < n; i++) {
+ final Pair<Double, Double> curr = pairs[i];
+ if (curr.getSecond().equals(prev.getSecond())) {
+ consecutiveYTies++;
+ } else {
+ tiedYPairs += sum(consecutiveYTies - 1);
+ consecutiveYTies = 1;
+ }
+ prev = curr;
+ }
+ tiedYPairs += sum(consecutiveYTies - 1);
+
+ final long concordantMinusDiscordant = numPairs - tiedXPairs - tiedYPairs + tiedXYPairs - 2 * swaps;
+ final double nonTiedPairsMultiplied = (numPairs - tiedXPairs) * (double) (numPairs - tiedYPairs);
+ return concordantMinusDiscordant / FastMath.sqrt(nonTiedPairsMultiplied);
+ }
+
+ /**
+ * Returns the sum of the number from 1 .. n according to Gauss' summation formula:
+ * \[ \sum\limits_{k=1}^n k = \frac{n(n + 1)}{2} \]
+ *
+ * @param n the summation end
+ * @return the sum of the number from 1 to n
+ */
+ private static long sum(long n) {
+ return n * (n + 1) / 2l;
+ }
+}
diff --git a/src/main/java/org/apache/commons/math3/stat/correlation/PearsonsCorrelation.java b/src/main/java/org/apache/commons/math3/stat/correlation/PearsonsCorrelation.java
new file mode 100644
index 0000000..53d17ab
--- /dev/null
+++ b/src/main/java/org/apache/commons/math3/stat/correlation/PearsonsCorrelation.java
@@ -0,0 +1,330 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math3.stat.correlation;
+
+import org.apache.commons.math3.distribution.TDistribution;
+import org.apache.commons.math3.exception.util.LocalizedFormats;
+import org.apache.commons.math3.exception.MathIllegalArgumentException;
+import org.apache.commons.math3.exception.NullArgumentException;
+import org.apache.commons.math3.exception.DimensionMismatchException;
+import org.apache.commons.math3.linear.RealMatrix;
+import org.apache.commons.math3.linear.BlockRealMatrix;
+import org.apache.commons.math3.stat.regression.SimpleRegression;
+import org.apache.commons.math3.util.FastMath;
+
+/**
+ * Computes Pearson's product-moment correlation coefficients for pairs of arrays
+ * or columns of a matrix.
+ *
+ * <p>The constructors that take <code>RealMatrix</code> or
+ * <code>double[][]</code> arguments generate correlation matrices. The
+ * columns of the input matrices are assumed to represent variable values.
+ * Correlations are given by the formula</p>
+ *
+ * <p><code>cor(X, Y) = &Sigma;[(x<sub>i</sub> - E(X))(y<sub>i</sub> - E(Y))] / [(n - 1)s(X)s(Y)]</code>
+ * where <code>E(X)</code> is the mean of <code>X</code>, <code>E(Y)</code>
+ * is the mean of the <code>Y</code> values and s(X), s(Y) are standard deviations.</p>
+ *
+ * <p>To compute the correlation coefficient for a single pair of arrays, use {@link #PearsonsCorrelation()}
+ * to construct an instance with no data and then {@link #correlation(double[], double[])}.
+ * Correlation matrices can also be computed directly from an instance with no data using
+ * {@link #computeCorrelationMatrix(double[][])}. In order to use {@link #getCorrelationMatrix()},
+ * {@link #getCorrelationPValues()}, or {@link #getCorrelationStandardErrors()}; however, one of the
+ * constructors supplying data or a covariance matrix must be used to create the instance.</p>
+ *
+ * @since 2.0
+ */
+public class PearsonsCorrelation {
+
+ /** correlation matrix */
+ private final RealMatrix correlationMatrix;
+
+ /** number of observations */
+ private final int nObs;
+
+ /**
+ * Create a PearsonsCorrelation instance without data.
+ */
+ public PearsonsCorrelation() {
+ super();
+ correlationMatrix = null;
+ nObs = 0;
+ }
+
+ /**
+ * Create a PearsonsCorrelation from a rectangular array
+ * whose columns represent values of variables to be correlated.
+ *
+ * Throws MathIllegalArgumentException if the input array does not have at least
+ * two columns and two rows. Pairwise correlations are set to NaN if one
+ * of the correlates has zero variance.
+ *
+ * @param data rectangular array with columns representing variables
+ * @throws MathIllegalArgumentException if the input data array is not
+ * rectangular with at least two rows and two columns.
+ * @see #correlation(double[], double[])
+ */
+ public PearsonsCorrelation(double[][] data) {
+ this(new BlockRealMatrix(data));
+ }
+
+ /**
+ * Create a PearsonsCorrelation from a RealMatrix whose columns
+ * represent variables to be correlated.
+ *
+ * Throws MathIllegalArgumentException if the matrix does not have at least
+ * two columns and two rows. Pairwise correlations are set to NaN if one
+ * of the correlates has zero variance.
+ *
+ * @param matrix matrix with columns representing variables to correlate
+ * @throws MathIllegalArgumentException if the matrix does not contain sufficient data
+ * @see #correlation(double[], double[])
+ */
+ public PearsonsCorrelation(RealMatrix matrix) {
+ nObs = matrix.getRowDimension();
+ correlationMatrix = computeCorrelationMatrix(matrix);
+ }
+
+ /**
+ * Create a PearsonsCorrelation from a {@link Covariance}. The correlation
+ * matrix is computed by scaling the Covariance's covariance matrix.
+ * The Covariance instance must have been created from a data matrix with
+ * columns representing variable values.
+ *
+ * @param covariance Covariance instance
+ */
+ public PearsonsCorrelation(Covariance covariance) {
+ RealMatrix covarianceMatrix = covariance.getCovarianceMatrix();
+ if (covarianceMatrix == null) {
+ throw new NullArgumentException(LocalizedFormats.COVARIANCE_MATRIX);
+ }
+ nObs = covariance.getN();
+ correlationMatrix = covarianceToCorrelation(covarianceMatrix);
+ }
+
+ /**
+ * Create a PearsonsCorrelation from a covariance matrix. The correlation
+ * matrix is computed by scaling the covariance matrix.
+ *
+ * @param covarianceMatrix covariance matrix
+ * @param numberOfObservations the number of observations in the dataset used to compute
+ * the covariance matrix
+ */
+ public PearsonsCorrelation(RealMatrix covarianceMatrix, int numberOfObservations) {
+ nObs = numberOfObservations;
+ correlationMatrix = covarianceToCorrelation(covarianceMatrix);
+ }
+
+ /**
+ * Returns the correlation matrix.
+ *
+ * <p>This method will return null if the argumentless constructor was used
+ * to create this instance, even if {@link #computeCorrelationMatrix(double[][])}
+ * has been called before it is activated.</p>
+ *
+ * @return correlation matrix
+ */
+ public RealMatrix getCorrelationMatrix() {
+ return correlationMatrix;
+ }
+
+ /**
+ * Returns a matrix of standard errors associated with the estimates
+ * in the correlation matrix.<br/>
+ * <code>getCorrelationStandardErrors().getEntry(i,j)</code> is the standard
+ * error associated with <code>getCorrelationMatrix.getEntry(i,j)</code>
+ *
+ * <p>The formula used to compute the standard error is <br/>
+ * <code>SE<sub>r</sub> = ((1 - r<sup>2</sup>) / (n - 2))<sup>1/2</sup></code>
+ * where <code>r</code> is the estimated correlation coefficient and
+ * <code>n</code> is the number of observations in the source dataset.</p>
+ *
+ * <p>To use this method, one of the constructors that supply an input
+ * matrix must have been used to create this instance.</p>
+ *
+ * @return matrix of correlation standard errors
+ * @throws NullPointerException if this instance was created with no data
+ */
+ public RealMatrix getCorrelationStandardErrors() {
+ int nVars = correlationMatrix.getColumnDimension();
+ double[][] out = new double[nVars][nVars];
+ for (int i = 0; i < nVars; i++) {
+ for (int j = 0; j < nVars; j++) {
+ double r = correlationMatrix.getEntry(i, j);
+ out[i][j] = FastMath.sqrt((1 - r * r) /(nObs - 2));
+ }
+ }
+ return new BlockRealMatrix(out);
+ }
+
+ /**
+ * Returns a matrix of p-values associated with the (two-sided) null
+ * hypothesis that the corresponding correlation coefficient is zero.
+ *
+ * <p><code>getCorrelationPValues().getEntry(i,j)</code> is the probability
+ * that a random variable distributed as <code>t<sub>n-2</sub></code> takes
+ * a value with absolute value greater than or equal to <br>
+ * <code>|r|((n - 2) / (1 - r<sup>2</sup>))<sup>1/2</sup></code></p>
+ *
+ * <p>The values in the matrix are sometimes referred to as the
+ * <i>significance</i> of the corresponding correlation coefficients.</p>
+ *
+ * <p>To use this method, one of the constructors that supply an input
+ * matrix must have been used to create this instance.</p>
+ *
+ * @return matrix of p-values
+ * @throws org.apache.commons.math3.exception.MaxCountExceededException
+ * if an error occurs estimating probabilities
+ * @throws NullPointerException if this instance was created with no data
+ */
+ public RealMatrix getCorrelationPValues() {
+ TDistribution tDistribution = new TDistribution(nObs - 2);
+ int nVars = correlationMatrix.getColumnDimension();
+ double[][] out = new double[nVars][nVars];
+ for (int i = 0; i < nVars; i++) {
+ for (int j = 0; j < nVars; j++) {
+ if (i == j) {
+ out[i][j] = 0d;
+ } else {
+ double r = correlationMatrix.getEntry(i, j);
+ double t = FastMath.abs(r * FastMath.sqrt((nObs - 2)/(1 - r * r)));
+ out[i][j] = 2 * tDistribution.cumulativeProbability(-t);
+ }
+ }
+ }
+ return new BlockRealMatrix(out);
+ }
+
+
+ /**
+ * Computes the correlation matrix for the columns of the
+ * input matrix, using {@link #correlation(double[], double[])}.
+ *
+ * Throws MathIllegalArgumentException if the matrix does not have at least
+ * two columns and two rows. Pairwise correlations are set to NaN if one
+ * of the correlates has zero variance.
+ *
+ * @param matrix matrix with columns representing variables to correlate
+ * @return correlation matrix
+ * @throws MathIllegalArgumentException if the matrix does not contain sufficient data
+ * @see #correlation(double[], double[])
+ */
+ public RealMatrix computeCorrelationMatrix(RealMatrix matrix) {
+ checkSufficientData(matrix);
+ int nVars = matrix.getColumnDimension();
+ RealMatrix outMatrix = new BlockRealMatrix(nVars, nVars);
+ for (int i = 0; i < nVars; i++) {
+ for (int j = 0; j < i; j++) {
+ double corr = correlation(matrix.getColumn(i), matrix.getColumn(j));
+ outMatrix.setEntry(i, j, corr);
+ outMatrix.setEntry(j, i, corr);
+ }
+ outMatrix.setEntry(i, i, 1d);
+ }
+ return outMatrix;
+ }
+
+ /**
+ * Computes the correlation matrix for the columns of the
+ * input rectangular array. The columns of the array represent values
+ * of variables to be correlated.
+ *
+ * Throws MathIllegalArgumentException if the matrix does not have at least
+ * two columns and two rows or if the array is not rectangular. Pairwise
+ * correlations are set to NaN if one of the correlates has zero variance.
+ *
+ * @param data matrix with columns representing variables to correlate
+ * @return correlation matrix
+ * @throws MathIllegalArgumentException if the array does not contain sufficient data
+ * @see #correlation(double[], double[])
+ */
+ public RealMatrix computeCorrelationMatrix(double[][] data) {
+ return computeCorrelationMatrix(new BlockRealMatrix(data));
+ }
+
+ /**
+ * Computes the Pearson's product-moment correlation coefficient between two arrays.
+ *
+ * <p>Throws MathIllegalArgumentException if the arrays do not have the same length
+ * or their common length is less than 2. Returns {@code NaN} if either of the arrays
+ * has zero variance (i.e., if one of the arrays does not contain at least two distinct
+ * values).</p>
+ *
+ * @param xArray first data array
+ * @param yArray second data array
+ * @return Returns Pearson's correlation coefficient for the two arrays
+ * @throws DimensionMismatchException if the arrays lengths do not match
+ * @throws MathIllegalArgumentException if there is insufficient data
+ */
+ public double correlation(final double[] xArray, final double[] yArray) {
+ SimpleRegression regression = new SimpleRegression();
+ if (xArray.length != yArray.length) {
+ throw new DimensionMismatchException(xArray.length, yArray.length);
+ } else if (xArray.length < 2) {
+ throw new MathIllegalArgumentException(LocalizedFormats.INSUFFICIENT_DIMENSION,
+ xArray.length, 2);
+ } else {
+ for(int i=0; i<xArray.length; i++) {
+ regression.addData(xArray[i], yArray[i]);
+ }
+ return regression.getR();
+ }
+ }
+
+ /**
+ * Derives a correlation matrix from a covariance matrix.
+ *
+ * <p>Uses the formula <br/>
+ * <code>r(X,Y) = cov(X,Y)/s(X)s(Y)</code> where
+ * <code>r(&middot,&middot;)</code> is the correlation coefficient and
+ * <code>s(&middot;)</code> means standard deviation.</p>
+ *
+ * @param covarianceMatrix the covariance matrix
+ * @return correlation matrix
+ */
+ public RealMatrix covarianceToCorrelation(RealMatrix covarianceMatrix) {
+ int nVars = covarianceMatrix.getColumnDimension();
+ RealMatrix outMatrix = new BlockRealMatrix(nVars, nVars);
+ for (int i = 0; i < nVars; i++) {
+ double sigma = FastMath.sqrt(covarianceMatrix.getEntry(i, i));
+ outMatrix.setEntry(i, i, 1d);
+ for (int j = 0; j < i; j++) {
+ double entry = covarianceMatrix.getEntry(i, j) /
+ (sigma * FastMath.sqrt(covarianceMatrix.getEntry(j, j)));
+ outMatrix.setEntry(i, j, entry);
+ outMatrix.setEntry(j, i, entry);
+ }
+ }
+ return outMatrix;
+ }
+
+ /**
+ * Throws MathIllegalArgumentException if the matrix does not have at least
+ * two columns and two rows.
+ *
+ * @param matrix matrix to check for sufficiency
+ * @throws MathIllegalArgumentException if there is insufficient data
+ */
+ private void checkSufficientData(final RealMatrix matrix) {
+ int nRows = matrix.getRowDimension();
+ int nCols = matrix.getColumnDimension();
+ if (nRows < 2 || nCols < 2) {
+ throw new MathIllegalArgumentException(LocalizedFormats.INSUFFICIENT_ROWS_AND_COLUMNS,
+ nRows, nCols);
+ }
+ }
+}
diff --git a/src/main/java/org/apache/commons/math3/stat/correlation/SpearmansCorrelation.java b/src/main/java/org/apache/commons/math3/stat/correlation/SpearmansCorrelation.java
new file mode 100644
index 0000000..80c0a54
--- /dev/null
+++ b/src/main/java/org/apache/commons/math3/stat/correlation/SpearmansCorrelation.java
@@ -0,0 +1,262 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.math3.stat.correlation;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.commons.math3.exception.DimensionMismatchException;
+import org.apache.commons.math3.exception.MathIllegalArgumentException;
+import org.apache.commons.math3.exception.util.LocalizedFormats;
+import org.apache.commons.math3.linear.BlockRealMatrix;
+import org.apache.commons.math3.linear.RealMatrix;
+import org.apache.commons.math3.stat.ranking.NaNStrategy;
+import org.apache.commons.math3.stat.ranking.NaturalRanking;
+import org.apache.commons.math3.stat.ranking.RankingAlgorithm;
+
+/**
+ * Spearman's rank correlation. This implementation performs a rank
+ * transformation on the input data and then computes {@link PearsonsCorrelation}
+ * on the ranked data.
+ * <p>
+ * By default, ranks are computed using {@link NaturalRanking} with default
+ * strategies for handling NaNs and ties in the data (NaNs maximal, ties averaged).
+ * The ranking algorithm can be set using a constructor argument.
+ *
+ * @since 2.0
+ */
+public class SpearmansCorrelation {
+
+ /** Input data */
+ private final RealMatrix data;
+
+ /** Ranking algorithm */
+ private final RankingAlgorithm rankingAlgorithm;
+
+ /** Rank correlation */
+ private final PearsonsCorrelation rankCorrelation;
+
+ /**
+ * Create a SpearmansCorrelation without data.
+ */
+ public SpearmansCorrelation() {
+ this(new NaturalRanking());
+ }
+
+ /**
+ * Create a SpearmansCorrelation with the given ranking algorithm.
+ * <p>
+ * From version 4.0 onwards this constructor will throw an exception
+ * if the provided {@link NaturalRanking} uses a {@link NaNStrategy#REMOVED} strategy.
+ *
+ * @param rankingAlgorithm ranking algorithm
+ * @since 3.1
+ */
+ public SpearmansCorrelation(final RankingAlgorithm rankingAlgorithm) {
+ data = null;
+ this.rankingAlgorithm = rankingAlgorithm;
+ rankCorrelation = null;
+ }
+
+ /**
+ * Create a SpearmansCorrelation from the given data matrix.
+ *
+ * @param dataMatrix matrix of data with columns representing
+ * variables to correlate
+ */
+ public SpearmansCorrelation(final RealMatrix dataMatrix) {
+ this(dataMatrix, new NaturalRanking());
+ }
+
+ /**
+ * Create a SpearmansCorrelation with the given input data matrix
+ * and ranking algorithm.
+ * <p>
+ * From version 4.0 onwards this constructor will throw an exception
+ * if the provided {@link NaturalRanking} uses a {@link NaNStrategy#REMOVED} strategy.
+ *
+ * @param dataMatrix matrix of data with columns representing
+ * variables to correlate
+ * @param rankingAlgorithm ranking algorithm
+ */
+ public SpearmansCorrelation(final RealMatrix dataMatrix, final RankingAlgorithm rankingAlgorithm) {
+ this.rankingAlgorithm = rankingAlgorithm;
+ this.data = rankTransform(dataMatrix);
+ rankCorrelation = new PearsonsCorrelation(data);
+ }
+
+ /**
+ * Calculate the Spearman Rank Correlation Matrix.
+ *
+ * @return Spearman Rank Correlation Matrix
+ * @throws NullPointerException if this instance was created with no data
+ */
+ public RealMatrix getCorrelationMatrix() {
+ return rankCorrelation.getCorrelationMatrix();
+ }
+
+ /**
+ * Returns a {@link PearsonsCorrelation} instance constructed from the
+ * ranked input data. That is,
+ * <code>new SpearmansCorrelation(matrix).getRankCorrelation()</code>
+ * is equivalent to
+ * <code>new PearsonsCorrelation(rankTransform(matrix))</code> where
+ * <code>rankTransform(matrix)</code> is the result of applying the
+ * configured <code>RankingAlgorithm</code> to each of the columns of
+ * <code>matrix.</code>
+ *
+ * <p>Returns null if this instance was created with no data.</p>
+ *
+ * @return PearsonsCorrelation among ranked column data
+ */
+ public PearsonsCorrelation getRankCorrelation() {
+ return rankCorrelation;
+ }
+
+ /**
+ * Computes the Spearman's rank correlation matrix for the columns of the
+ * input matrix.
+ *
+ * @param matrix matrix with columns representing variables to correlate
+ * @return correlation matrix
+ */
+ public RealMatrix computeCorrelationMatrix(final RealMatrix matrix) {
+ final RealMatrix matrixCopy = rankTransform(matrix);
+ return new PearsonsCorrelation().computeCorrelationMatrix(matrixCopy);
+ }
+
+ /**
+ * Computes the Spearman's rank correlation matrix for the columns of the
+ * input rectangular array. The columns of the array represent values
+ * of variables to be correlated.
+ *
+ * @param matrix matrix with columns representing variables to correlate
+ * @return correlation matrix
+ */
+ public RealMatrix computeCorrelationMatrix(final double[][] matrix) {
+ return computeCorrelationMatrix(new BlockRealMatrix(matrix));
+ }
+
+ /**
+ * Computes the Spearman's rank correlation coefficient between the two arrays.
+ *
+ * @param xArray first data array
+ * @param yArray second data array
+ * @return Returns Spearman's rank correlation coefficient for the two arrays
+ * @throws DimensionMismatchException if the arrays lengths do not match
+ * @throws MathIllegalArgumentException if the array length is less than 2
+ */
+ public double correlation(final double[] xArray, final double[] yArray) {
+ if (xArray.length != yArray.length) {
+ throw new DimensionMismatchException(xArray.length, yArray.length);
+ } else if (xArray.length < 2) {
+ throw new MathIllegalArgumentException(LocalizedFormats.INSUFFICIENT_DIMENSION,
+ xArray.length, 2);
+ } else {
+ double[] x = xArray;
+ double[] y = yArray;
+ if (rankingAlgorithm instanceof NaturalRanking &&
+ NaNStrategy.REMOVED == ((NaturalRanking) rankingAlgorithm).getNanStrategy()) {
+ final Set<Integer> nanPositions = new HashSet<Integer>();
+
+ nanPositions.addAll(getNaNPositions(xArray));
+ nanPositions.addAll(getNaNPositions(yArray));
+
+ x = removeValues(xArray, nanPositions);
+ y = removeValues(yArray, nanPositions);
+ }
+ return new PearsonsCorrelation().correlation(rankingAlgorithm.rank(x), rankingAlgorithm.rank(y));
+ }
+ }
+
+ /**
+ * Applies rank transform to each of the columns of <code>matrix</code>
+ * using the current <code>rankingAlgorithm</code>.
+ *
+ * @param matrix matrix to transform
+ * @return a rank-transformed matrix
+ */
+ private RealMatrix rankTransform(final RealMatrix matrix) {
+ RealMatrix transformed = null;
+
+ if (rankingAlgorithm instanceof NaturalRanking &&
+ ((NaturalRanking) rankingAlgorithm).getNanStrategy() == NaNStrategy.REMOVED) {
+ final Set<Integer> nanPositions = new HashSet<Integer>();
+ for (int i = 0; i < matrix.getColumnDimension(); i++) {
+ nanPositions.addAll(getNaNPositions(matrix.getColumn(i)));
+ }
+
+ // if we have found NaN values, we have to update the matrix size
+ if (!nanPositions.isEmpty()) {
+ transformed = new BlockRealMatrix(matrix.getRowDimension() - nanPositions.size(),
+ matrix.getColumnDimension());
+ for (int i = 0; i < transformed.getColumnDimension(); i++) {
+ transformed.setColumn(i, removeValues(matrix.getColumn(i), nanPositions));
+ }
+ }
+ }
+
+ if (transformed == null) {
+ transformed = matrix.copy();
+ }
+
+ for (int i = 0; i < transformed.getColumnDimension(); i++) {
+ transformed.setColumn(i, rankingAlgorithm.rank(transformed.getColumn(i)));
+ }
+
+ return transformed;
+ }
+
+ /**
+ * Returns a list containing the indices of NaN values in the input array.
+ *
+ * @param input the input array
+ * @return a list of NaN positions in the input array
+ */
+ private List<Integer> getNaNPositions(final double[] input) {
+ final List<Integer> positions = new ArrayList<Integer>();
+ for (int i = 0; i < input.length; i++) {
+ if (Double.isNaN(input[i])) {
+ positions.add(i);
+ }
+ }
+ return positions;
+ }
+
+ /**
+ * Removes all values from the input array at the specified indices.
+ *
+ * @param input the input array
+ * @param indices a set containing the indices to be removed
+ * @return the input array without the values at the specified indices
+ */
+ private double[] removeValues(final double[] input, final Set<Integer> indices) {
+ if (indices.isEmpty()) {
+ return input;
+ }
+ final double[] result = new double[input.length - indices.size()];
+ for (int i = 0, j = 0; i < input.length; i++) {
+ if (!indices.contains(i)) {
+ result[j++] = input[i];
+ }
+ }
+ return result;
+ }
+}
diff --git a/src/main/java/org/apache/commons/math3/stat/correlation/StorelessBivariateCovariance.java b/src/main/java/org/apache/commons/math3/stat/correlation/StorelessBivariateCovariance.java
new file mode 100644
index 0000000..1a798d2
--- /dev/null
+++ b/src/main/java/org/apache/commons/math3/stat/correlation/StorelessBivariateCovariance.java
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math3.stat.correlation;
+
+import org.apache.commons.math3.exception.NumberIsTooSmallException;
+import org.apache.commons.math3.exception.util.LocalizedFormats;
+
+/**
+ * Bivariate Covariance implementation that does not require input data to be
+ * stored in memory.
+ *
+ * <p>This class is based on a paper written by Philippe P&eacute;bay:
+ * <a href="http://prod.sandia.gov/techlib/access-control.cgi/2008/086212.pdf">
+ * Formulas for Robust, One-Pass Parallel Computation of Covariances and
+ * Arbitrary-Order Statistical Moments</a>, 2008, Technical Report SAND2008-6212,
+ * Sandia National Laboratories. It computes the covariance for a pair of variables.
+ * Use {@link StorelessCovariance} to estimate an entire covariance matrix.</p>
+ *
+ * <p>Note: This class is package private as it is only used internally in
+ * the {@link StorelessCovariance} class.</p>
+ *
+ * @since 3.0
+ */
+class StorelessBivariateCovariance {
+
+ /** the mean of variable x */
+ private double meanX;
+
+ /** the mean of variable y */
+ private double meanY;
+
+ /** number of observations */
+ private double n;
+
+ /** the running covariance estimate */
+ private double covarianceNumerator;
+
+ /** flag for bias correction */
+ private boolean biasCorrected;
+
+ /**
+ * Create an empty {@link StorelessBivariateCovariance} instance with
+ * bias correction.
+ */
+ StorelessBivariateCovariance() {
+ this(true);
+ }
+
+ /**
+ * Create an empty {@link StorelessBivariateCovariance} instance.
+ *
+ * @param biasCorrection if <code>true</code> the covariance estimate is corrected
+ * for bias, i.e. n-1 in the denominator, otherwise there is no bias correction,
+ * i.e. n in the denominator.
+ */
+ StorelessBivariateCovariance(final boolean biasCorrection) {
+ meanX = meanY = 0.0;
+ n = 0;
+ covarianceNumerator = 0.0;
+ biasCorrected = biasCorrection;
+ }
+
+ /**
+ * Update the covariance estimation with a pair of variables (x, y).
+ *
+ * @param x the x value
+ * @param y the y value
+ */
+ public void increment(final double x, final double y) {
+ n++;
+ final double deltaX = x - meanX;
+ final double deltaY = y - meanY;
+ meanX += deltaX / n;
+ meanY += deltaY / n;
+ covarianceNumerator += ((n - 1.0) / n) * deltaX * deltaY;
+ }
+
+ /**
+ * Appends another bivariate covariance calculation to this.
+ * After this operation, statistics returned should be close to what would
+ * have been obtained by by performing all of the {@link #increment(double, double)}
+ * operations in {@code cov} directly on this.
+ *
+ * @param cov StorelessBivariateCovariance instance to append.
+ */
+ public void append(StorelessBivariateCovariance cov) {
+ double oldN = n;
+ n += cov.n;
+ final double deltaX = cov.meanX - meanX;
+ final double deltaY = cov.meanY - meanY;
+ meanX += deltaX * cov.n / n;
+ meanY += deltaY * cov.n / n;
+ covarianceNumerator += cov.covarianceNumerator + oldN * cov.n / n * deltaX * deltaY;
+ }
+
+ /**
+ * Returns the number of observations.
+ *
+ * @return number of observations
+ */
+ public double getN() {
+ return n;
+ }
+
+ /**
+ * Return the current covariance estimate.
+ *
+ * @return the current covariance
+ * @throws NumberIsTooSmallException if the number of observations
+ * is &lt; 2
+ */
+ public double getResult() throws NumberIsTooSmallException {
+ if (n < 2) {
+ throw new NumberIsTooSmallException(LocalizedFormats.INSUFFICIENT_DIMENSION,
+ n, 2, true);
+ }
+ if (biasCorrected) {
+ return covarianceNumerator / (n - 1d);
+ } else {
+ return covarianceNumerator / n;
+ }
+ }
+}
+
diff --git a/src/main/java/org/apache/commons/math3/stat/correlation/StorelessCovariance.java b/src/main/java/org/apache/commons/math3/stat/correlation/StorelessCovariance.java
new file mode 100644
index 0000000..7e927ca
--- /dev/null
+++ b/src/main/java/org/apache/commons/math3/stat/correlation/StorelessCovariance.java
@@ -0,0 +1,229 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math3.stat.correlation;
+
+import org.apache.commons.math3.exception.DimensionMismatchException;
+import org.apache.commons.math3.exception.MathUnsupportedOperationException;
+import org.apache.commons.math3.exception.NumberIsTooSmallException;
+import org.apache.commons.math3.linear.MatrixUtils;
+import org.apache.commons.math3.linear.RealMatrix;
+
+/**
+ * Covariance implementation that does not require input data to be
+ * stored in memory. The size of the covariance matrix is specified in the
+ * constructor. Specific elements of the matrix are incrementally updated with
+ * calls to incrementRow() or increment Covariance().
+ *
+ * <p>This class is based on a paper written by Philippe P&eacute;bay:
+ * <a href="http://prod.sandia.gov/techlib/access-control.cgi/2008/086212.pdf">
+ * Formulas for Robust, One-Pass Parallel Computation of Covariances and
+ * Arbitrary-Order Statistical Moments</a>, 2008, Technical Report SAND2008-6212,
+ * Sandia National Laboratories.</p>
+ *
+ * <p>Note: the underlying covariance matrix is symmetric, thus only the
+ * upper triangular part of the matrix is stored and updated each increment.</p>
+ *
+ * @since 3.0
+ */
+public class StorelessCovariance extends Covariance {
+
+ /** the square covariance matrix (upper triangular part) */
+ private StorelessBivariateCovariance[] covMatrix;
+
+ /** dimension of the square covariance matrix */
+ private int dimension;
+
+ /**
+ * Create a bias corrected covariance matrix with a given dimension.
+ *
+ * @param dim the dimension of the square covariance matrix
+ */
+ public StorelessCovariance(final int dim) {
+ this(dim, true);
+ }
+
+ /**
+ * Create a covariance matrix with a given number of rows and columns and the
+ * indicated bias correction.
+ *
+ * @param dim the dimension of the covariance matrix
+ * @param biasCorrected if <code>true</code> the covariance estimate is corrected
+ * for bias, i.e. n-1 in the denominator, otherwise there is no bias correction,
+ * i.e. n in the denominator.
+ */
+ public StorelessCovariance(final int dim, final boolean biasCorrected) {
+ dimension = dim;
+ covMatrix = new StorelessBivariateCovariance[dimension * (dimension + 1) / 2];
+ initializeMatrix(biasCorrected);
+ }
+
+ /**
+ * Initialize the internal two-dimensional array of
+ * {@link StorelessBivariateCovariance} instances.
+ *
+ * @param biasCorrected if the covariance estimate shall be corrected for bias
+ */
+ private void initializeMatrix(final boolean biasCorrected) {
+ for(int i = 0; i < dimension; i++){
+ for(int j = 0; j < dimension; j++){
+ setElement(i, j, new StorelessBivariateCovariance(biasCorrected));
+ }
+ }
+ }
+
+ /**
+ * Returns the index (i, j) translated into the one-dimensional
+ * array used to store the upper triangular part of the symmetric
+ * covariance matrix.
+ *
+ * @param i the row index
+ * @param j the column index
+ * @return the corresponding index in the matrix array
+ */
+ private int indexOf(final int i, final int j) {
+ return j < i ? i * (i + 1) / 2 + j : j * (j + 1) / 2 + i;
+ }
+
+ /**
+ * Gets the element at index (i, j) from the covariance matrix
+ * @param i the row index
+ * @param j the column index
+ * @return the {@link StorelessBivariateCovariance} element at the given index
+ */
+ private StorelessBivariateCovariance getElement(final int i, final int j) {
+ return covMatrix[indexOf(i, j)];
+ }
+
+ /**
+ * Sets the covariance element at index (i, j) in the covariance matrix
+ * @param i the row index
+ * @param j the column index
+ * @param cov the {@link StorelessBivariateCovariance} element to be set
+ */
+ private void setElement(final int i, final int j,
+ final StorelessBivariateCovariance cov) {
+ covMatrix[indexOf(i, j)] = cov;
+ }
+
+ /**
+ * Get the covariance for an individual element of the covariance matrix.
+ *
+ * @param xIndex row index in the covariance matrix
+ * @param yIndex column index in the covariance matrix
+ * @return the covariance of the given element
+ * @throws NumberIsTooSmallException if the number of observations
+ * in the cell is &lt; 2
+ */
+ public double getCovariance(final int xIndex,
+ final int yIndex)
+ throws NumberIsTooSmallException {
+
+ return getElement(xIndex, yIndex).getResult();
+
+ }
+
+ /**
+ * Increment the covariance matrix with one row of data.
+ *
+ * @param data array representing one row of data.
+ * @throws DimensionMismatchException if the length of <code>rowData</code>
+ * does not match with the covariance matrix
+ */
+ public void increment(final double[] data)
+ throws DimensionMismatchException {
+
+ int length = data.length;
+ if (length != dimension) {
+ throw new DimensionMismatchException(length, dimension);
+ }
+
+ // only update the upper triangular part of the covariance matrix
+ // as only these parts are actually stored
+ for (int i = 0; i < length; i++){
+ for (int j = i; j < length; j++){
+ getElement(i, j).increment(data[i], data[j]);
+ }
+ }
+
+ }
+
+ /**
+ * Appends {@code sc} to this, effectively aggregating the computations in {@code sc}
+ * with this. After invoking this method, covariances returned should be close
+ * to what would have been obtained by performing all of the {@link #increment(double[])}
+ * operations in {@code sc} directly on this.
+ *
+ * @param sc externally computed StorelessCovariance to add to this
+ * @throws DimensionMismatchException if the dimension of sc does not match this
+ * @since 3.3
+ */
+ public void append(StorelessCovariance sc) throws DimensionMismatchException {
+ if (sc.dimension != dimension) {
+ throw new DimensionMismatchException(sc.dimension, dimension);
+ }
+
+ // only update the upper triangular part of the covariance matrix
+ // as only these parts are actually stored
+ for (int i = 0; i < dimension; i++) {
+ for (int j = i; j < dimension; j++) {
+ getElement(i, j).append(sc.getElement(i, j));
+ }
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ * @throws NumberIsTooSmallException if the number of observations
+ * in a cell is &lt; 2
+ */
+ @Override
+ public RealMatrix getCovarianceMatrix() throws NumberIsTooSmallException {
+ return MatrixUtils.createRealMatrix(getData());
+ }
+
+ /**
+ * Return the covariance matrix as two-dimensional array.
+ *
+ * @return a two-dimensional double array of covariance values
+ * @throws NumberIsTooSmallException if the number of observations
+ * for a cell is &lt; 2
+ */
+ public double[][] getData() throws NumberIsTooSmallException {
+ final double[][] data = new double[dimension][dimension];
+ for (int i = 0; i < dimension; i++) {
+ for (int j = 0; j < dimension; j++) {
+ data[i][j] = getElement(i, j).getResult();
+ }
+ }
+ return data;
+ }
+
+ /**
+ * This {@link Covariance} method is not supported by a {@link StorelessCovariance},
+ * since the number of bivariate observations does not have to be the same for different
+ * pairs of covariates - i.e., N as defined in {@link Covariance#getN()} is undefined.
+ *
+ * @return nothing as this implementation always throws a
+ * {@link MathUnsupportedOperationException}
+ * @throws MathUnsupportedOperationException in all cases
+ */
+ @Override
+ public int getN()
+ throws MathUnsupportedOperationException {
+ throw new MathUnsupportedOperationException();
+ }
+}
diff --git a/src/main/java/org/apache/commons/math3/stat/correlation/package-info.java b/src/main/java/org/apache/commons/math3/stat/correlation/package-info.java
new file mode 100644
index 0000000..adf285e
--- /dev/null
+++ b/src/main/java/org/apache/commons/math3/stat/correlation/package-info.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ *
+ * Correlations/Covariance computations.
+ *
+ */
+package org.apache.commons.math3.stat.correlation;