diff options
Diffstat (limited to 'src/main/java/org/apache/commons/math3/stat/correlation/Covariance.java')
-rw-r--r-- | src/main/java/org/apache/commons/math3/stat/correlation/Covariance.java | 295 |
1 files changed, 295 insertions, 0 deletions
diff --git a/src/main/java/org/apache/commons/math3/stat/correlation/Covariance.java b/src/main/java/org/apache/commons/math3/stat/correlation/Covariance.java new file mode 100644 index 0000000..c462401 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/stat/correlation/Covariance.java @@ -0,0 +1,295 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.stat.correlation; + +import org.apache.commons.math3.exception.MathIllegalArgumentException; +import org.apache.commons.math3.exception.NotStrictlyPositiveException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.linear.RealMatrix; +import org.apache.commons.math3.linear.BlockRealMatrix; +import org.apache.commons.math3.stat.descriptive.moment.Mean; +import org.apache.commons.math3.stat.descriptive.moment.Variance; + +/** + * Computes covariances for pairs of arrays or columns of a matrix. + * + * <p>The constructors that take <code>RealMatrix</code> or + * <code>double[][]</code> arguments generate covariance matrices. The + * columns of the input matrices are assumed to represent variable values.</p> + * + * <p>The constructor argument <code>biasCorrected</code> determines whether or + * not computed covariances are bias-corrected.</p> + * + * <p>Unbiased covariances are given by the formula</p> + * <code>cov(X, Y) = Σ[(x<sub>i</sub> - E(X))(y<sub>i</sub> - E(Y))] / (n - 1)</code> + * where <code>E(X)</code> is the mean of <code>X</code> and <code>E(Y)</code> + * is the mean of the <code>Y</code> values. + * + * <p>Non-bias-corrected estimates use <code>n</code> in place of <code>n - 1</code> + * + * @since 2.0 + */ +public class Covariance { + + /** covariance matrix */ + private final RealMatrix covarianceMatrix; + + /** + * Create an empty covariance matrix. + */ + /** Number of observations (length of covariate vectors) */ + private final int n; + + /** + * Create a Covariance with no data + */ + public Covariance() { + super(); + covarianceMatrix = null; + n = 0; + } + + /** + * Create a Covariance matrix from a rectangular array + * whose columns represent covariates. + * + * <p>The <code>biasCorrected</code> parameter determines whether or not + * covariance estimates are bias-corrected.</p> + * + * <p>The input array must be rectangular with at least one column + * and two rows.</p> + * + * @param data rectangular array with columns representing covariates + * @param biasCorrected true means covariances are bias-corrected + * @throws MathIllegalArgumentException if the input data array is not + * rectangular with at least two rows and one column. + * @throws NotStrictlyPositiveException if the input data array is not + * rectangular with at least one row and one column. + */ + public Covariance(double[][] data, boolean biasCorrected) + throws MathIllegalArgumentException, NotStrictlyPositiveException { + this(new BlockRealMatrix(data), biasCorrected); + } + + /** + * Create a Covariance matrix from a rectangular array + * whose columns represent covariates. + * + * <p>The input array must be rectangular with at least one column + * and two rows</p> + * + * @param data rectangular array with columns representing covariates + * @throws MathIllegalArgumentException if the input data array is not + * rectangular with at least two rows and one column. + * @throws NotStrictlyPositiveException if the input data array is not + * rectangular with at least one row and one column. + */ + public Covariance(double[][] data) + throws MathIllegalArgumentException, NotStrictlyPositiveException { + this(data, true); + } + + /** + * Create a covariance matrix from a matrix whose columns + * represent covariates. + * + * <p>The <code>biasCorrected</code> parameter determines whether or not + * covariance estimates are bias-corrected.</p> + * + * <p>The matrix must have at least one column and two rows</p> + * + * @param matrix matrix with columns representing covariates + * @param biasCorrected true means covariances are bias-corrected + * @throws MathIllegalArgumentException if the input matrix does not have + * at least two rows and one column + */ + public Covariance(RealMatrix matrix, boolean biasCorrected) + throws MathIllegalArgumentException { + checkSufficientData(matrix); + n = matrix.getRowDimension(); + covarianceMatrix = computeCovarianceMatrix(matrix, biasCorrected); + } + + /** + * Create a covariance matrix from a matrix whose columns + * represent covariates. + * + * <p>The matrix must have at least one column and two rows</p> + * + * @param matrix matrix with columns representing covariates + * @throws MathIllegalArgumentException if the input matrix does not have + * at least two rows and one column + */ + public Covariance(RealMatrix matrix) throws MathIllegalArgumentException { + this(matrix, true); + } + + /** + * Returns the covariance matrix + * + * @return covariance matrix + */ + public RealMatrix getCovarianceMatrix() { + return covarianceMatrix; + } + + /** + * Returns the number of observations (length of covariate vectors) + * + * @return number of observations + */ + public int getN() { + return n; + } + + /** + * Compute a covariance matrix from a matrix whose columns represent + * covariates. + * @param matrix input matrix (must have at least one column and two rows) + * @param biasCorrected determines whether or not covariance estimates are bias-corrected + * @return covariance matrix + * @throws MathIllegalArgumentException if the matrix does not contain sufficient data + */ + protected RealMatrix computeCovarianceMatrix(RealMatrix matrix, boolean biasCorrected) + throws MathIllegalArgumentException { + int dimension = matrix.getColumnDimension(); + Variance variance = new Variance(biasCorrected); + RealMatrix outMatrix = new BlockRealMatrix(dimension, dimension); + for (int i = 0; i < dimension; i++) { + for (int j = 0; j < i; j++) { + double cov = covariance(matrix.getColumn(i), matrix.getColumn(j), biasCorrected); + outMatrix.setEntry(i, j, cov); + outMatrix.setEntry(j, i, cov); + } + outMatrix.setEntry(i, i, variance.evaluate(matrix.getColumn(i))); + } + return outMatrix; + } + + /** + * Create a covariance matrix from a matrix whose columns represent + * covariates. Covariances are computed using the bias-corrected formula. + * @param matrix input matrix (must have at least one column and two rows) + * @return covariance matrix + * @throws MathIllegalArgumentException if matrix does not contain sufficient data + * @see #Covariance + */ + protected RealMatrix computeCovarianceMatrix(RealMatrix matrix) + throws MathIllegalArgumentException { + return computeCovarianceMatrix(matrix, true); + } + + /** + * Compute a covariance matrix from a rectangular array whose columns represent + * covariates. + * @param data input array (must have at least one column and two rows) + * @param biasCorrected determines whether or not covariance estimates are bias-corrected + * @return covariance matrix + * @throws MathIllegalArgumentException if the data array does not contain sufficient + * data + * @throws NotStrictlyPositiveException if the input data array is not + * rectangular with at least one row and one column. + */ + protected RealMatrix computeCovarianceMatrix(double[][] data, boolean biasCorrected) + throws MathIllegalArgumentException, NotStrictlyPositiveException { + return computeCovarianceMatrix(new BlockRealMatrix(data), biasCorrected); + } + + /** + * Create a covariance matrix from a rectangular array whose columns represent + * covariates. Covariances are computed using the bias-corrected formula. + * @param data input array (must have at least one column and two rows) + * @return covariance matrix + * @throws MathIllegalArgumentException if the data array does not contain sufficient data + * @throws NotStrictlyPositiveException if the input data array is not + * rectangular with at least one row and one column. + * @see #Covariance + */ + protected RealMatrix computeCovarianceMatrix(double[][] data) + throws MathIllegalArgumentException, NotStrictlyPositiveException { + return computeCovarianceMatrix(data, true); + } + + /** + * Computes the covariance between the two arrays. + * + * <p>Array lengths must match and the common length must be at least 2.</p> + * + * @param xArray first data array + * @param yArray second data array + * @param biasCorrected if true, returned value will be bias-corrected + * @return returns the covariance for the two arrays + * @throws MathIllegalArgumentException if the arrays lengths do not match or + * there is insufficient data + */ + public double covariance(final double[] xArray, final double[] yArray, boolean biasCorrected) + throws MathIllegalArgumentException { + Mean mean = new Mean(); + double result = 0d; + int length = xArray.length; + if (length != yArray.length) { + throw new MathIllegalArgumentException( + LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE, length, yArray.length); + } else if (length < 2) { + throw new MathIllegalArgumentException( + LocalizedFormats.INSUFFICIENT_OBSERVED_POINTS_IN_SAMPLE, length, 2); + } else { + double xMean = mean.evaluate(xArray); + double yMean = mean.evaluate(yArray); + for (int i = 0; i < length; i++) { + double xDev = xArray[i] - xMean; + double yDev = yArray[i] - yMean; + result += (xDev * yDev - result) / (i + 1); + } + } + return biasCorrected ? result * ((double) length / (double)(length - 1)) : result; + } + + /** + * Computes the covariance between the two arrays, using the bias-corrected + * formula. + * + * <p>Array lengths must match and the common length must be at least 2.</p> + * + * @param xArray first data array + * @param yArray second data array + * @return returns the covariance for the two arrays + * @throws MathIllegalArgumentException if the arrays lengths do not match or + * there is insufficient data + */ + public double covariance(final double[] xArray, final double[] yArray) + throws MathIllegalArgumentException { + return covariance(xArray, yArray, true); + } + + /** + * Throws MathIllegalArgumentException if the matrix does not have at least + * one column and two rows. + * @param matrix matrix to check + * @throws MathIllegalArgumentException if the matrix does not contain sufficient data + * to compute covariance + */ + private void checkSufficientData(final RealMatrix matrix) throws MathIllegalArgumentException { + int nRows = matrix.getRowDimension(); + int nCols = matrix.getColumnDimension(); + if (nRows < 2 || nCols < 1) { + throw new MathIllegalArgumentException( + LocalizedFormats.INSUFFICIENT_ROWS_AND_COLUMNS, + nRows, nCols); + } + } +} |