diff options
Diffstat (limited to 'src/main/java/org/apache/commons/math/stat/descriptive/moment/Variance.java')
-rw-r--r-- | src/main/java/org/apache/commons/math/stat/descriptive/moment/Variance.java | 610 |
1 files changed, 610 insertions, 0 deletions
diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/moment/Variance.java b/src/main/java/org/apache/commons/math/stat/descriptive/moment/Variance.java new file mode 100644 index 0000000..6ce6835 --- /dev/null +++ b/src/main/java/org/apache/commons/math/stat/descriptive/moment/Variance.java @@ -0,0 +1,610 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math.stat.descriptive.moment; + +import java.io.Serializable; + +import org.apache.commons.math.exception.NullArgumentException; +import org.apache.commons.math.exception.util.LocalizedFormats; +import org.apache.commons.math.stat.descriptive.WeightedEvaluation; +import org.apache.commons.math.stat.descriptive.AbstractStorelessUnivariateStatistic; + +/** + * Computes the variance of the available values. By default, the unbiased + * "sample variance" definitional formula is used: + * <p> + * variance = sum((x_i - mean)^2) / (n - 1) </p> + * <p> + * where mean is the {@link Mean} and <code>n</code> is the number + * of sample observations.</p> + * <p> + * The definitional formula does not have good numerical properties, so + * this implementation does not compute the statistic using the definitional + * formula. <ul> + * <li> The <code>getResult</code> method computes the variance using + * updating formulas based on West's algorithm, as described in + * <a href="http://doi.acm.org/10.1145/359146.359152"> Chan, T. F. and + * J. G. Lewis 1979, <i>Communications of the ACM</i>, + * vol. 22 no. 9, pp. 526-531.</a></li> + * <li> The <code>evaluate</code> methods leverage the fact that they have the + * full array of values in memory to execute a two-pass algorithm. + * Specifically, these methods use the "corrected two-pass algorithm" from + * Chan, Golub, Levesque, <i>Algorithms for Computing the Sample Variance</i>, + * American Statistician, vol. 37, no. 3 (1983) pp. 242-247.</li></ul> + * Note that adding values using <code>increment</code> or + * <code>incrementAll</code> and then executing <code>getResult</code> will + * sometimes give a different, less accurate, result than executing + * <code>evaluate</code> with the full array of values. The former approach + * should only be used when the full array of values is not available.</p> + * <p> + * The "population variance" ( sum((x_i - mean)^2) / n ) can also + * be computed using this statistic. The <code>isBiasCorrected</code> + * property determines whether the "population" or "sample" value is + * returned by the <code>evaluate</code> and <code>getResult</code> methods. + * To compute population variances, set this property to <code>false.</code> + * </p> + * <p> + * <strong>Note that this implementation is not synchronized.</strong> If + * multiple threads access an instance of this class concurrently, and at least + * one of the threads invokes the <code>increment()</code> or + * <code>clear()</code> method, it must be synchronized externally.</p> + * + * @version $Revision: 1006299 $ $Date: 2010-10-10 16:47:17 +0200 (dim. 10 oct. 2010) $ + */ +public class Variance extends AbstractStorelessUnivariateStatistic implements Serializable, WeightedEvaluation { + + /** Serializable version identifier */ + private static final long serialVersionUID = -9111962718267217978L; + + /** SecondMoment is used in incremental calculation of Variance*/ + protected SecondMoment moment = null; + + /** + * Boolean test to determine if this Variance should also increment + * the second moment, this evaluates to false when this Variance is + * constructed with an external SecondMoment as a parameter. + */ + protected boolean incMoment = true; + + /** + * Determines whether or not bias correction is applied when computing the + * value of the statisic. True means that bias is corrected. See + * {@link Variance} for details on the formula. + */ + private boolean isBiasCorrected = true; + + /** + * Constructs a Variance with default (true) <code>isBiasCorrected</code> + * property. + */ + public Variance() { + moment = new SecondMoment(); + } + + /** + * Constructs a Variance based on an external second moment. + * + * @param m2 the SecondMoment (Third or Fourth moments work + * here as well.) + */ + public Variance(final SecondMoment m2) { + incMoment = false; + this.moment = m2; + } + + /** + * Constructs a Variance with the specified <code>isBiasCorrected</code> + * property + * + * @param isBiasCorrected setting for bias correction - true means + * bias will be corrected and is equivalent to using the argumentless + * constructor + */ + public Variance(boolean isBiasCorrected) { + moment = new SecondMoment(); + this.isBiasCorrected = isBiasCorrected; + } + + /** + * Constructs a Variance with the specified <code>isBiasCorrected</code> + * property and the supplied external second moment. + * + * @param isBiasCorrected setting for bias correction - true means + * bias will be corrected + * @param m2 the SecondMoment (Third or Fourth moments work + * here as well.) + */ + public Variance(boolean isBiasCorrected, SecondMoment m2) { + incMoment = false; + this.moment = m2; + this.isBiasCorrected = isBiasCorrected; + } + + /** + * Copy constructor, creates a new {@code Variance} identical + * to the {@code original} + * + * @param original the {@code Variance} instance to copy + */ + public Variance(Variance original) { + copy(original, this); + } + + /** + * {@inheritDoc} + * <p>If all values are available, it is more accurate to use + * {@link #evaluate(double[])} rather than adding values one at a time + * using this method and then executing {@link #getResult}, since + * <code>evaluate</code> leverages the fact that is has the full + * list of values together to execute a two-pass algorithm. + * See {@link Variance}.</p> + */ + @Override + public void increment(final double d) { + if (incMoment) { + moment.increment(d); + } + } + + /** + * {@inheritDoc} + */ + @Override + public double getResult() { + if (moment.n == 0) { + return Double.NaN; + } else if (moment.n == 1) { + return 0d; + } else { + if (isBiasCorrected) { + return moment.m2 / (moment.n - 1d); + } else { + return moment.m2 / (moment.n); + } + } + } + + /** + * {@inheritDoc} + */ + public long getN() { + return moment.getN(); + } + + /** + * {@inheritDoc} + */ + @Override + public void clear() { + if (incMoment) { + moment.clear(); + } + } + + /** + * Returns the variance of the entries in the input array, or + * <code>Double.NaN</code> if the array is empty. + * <p> + * See {@link Variance} for details on the computing algorithm.</p> + * <p> + * Returns 0 for a single-value (i.e. length = 1) sample.</p> + * <p> + * Throws <code>IllegalArgumentException</code> if the array is null.</p> + * <p> + * Does not change the internal state of the statistic.</p> + * + * @param values the input array + * @return the variance of the values or Double.NaN if length = 0 + * @throws IllegalArgumentException if the array is null + */ + @Override + public double evaluate(final double[] values) { + if (values == null) { + throw new NullArgumentException(LocalizedFormats.INPUT_ARRAY); + } + return evaluate(values, 0, values.length); + } + + /** + * Returns the variance of the entries in the specified portion of + * the input array, or <code>Double.NaN</code> if the designated subarray + * is empty. + * <p> + * See {@link Variance} for details on the computing algorithm.</p> + * <p> + * Returns 0 for a single-value (i.e. length = 1) sample.</p> + * <p> + * Does not change the internal state of the statistic.</p> + * <p> + * Throws <code>IllegalArgumentException</code> if the array is null.</p> + * + * @param values the input array + * @param begin index of the first array element to include + * @param length the number of elements to include + * @return the variance of the values or Double.NaN if length = 0 + * @throws IllegalArgumentException if the array is null or the array index + * parameters are not valid + */ + @Override + public double evaluate(final double[] values, final int begin, final int length) { + + double var = Double.NaN; + + if (test(values, begin, length)) { + clear(); + if (length == 1) { + var = 0.0; + } else if (length > 1) { + Mean mean = new Mean(); + double m = mean.evaluate(values, begin, length); + var = evaluate(values, m, begin, length); + } + } + return var; + } + + /** + * <p>Returns the weighted variance of the entries in the specified portion of + * the input array, or <code>Double.NaN</code> if the designated subarray + * is empty.</p> + * <p> + * Uses the formula <pre> + * Σ(weights[i]*(values[i] - weightedMean)<sup>2</sup>)/(Σ(weights[i]) - 1) + * </pre> + * where weightedMean is the weighted mean</p> + * <p> + * This formula will not return the same result as the unweighted variance when all + * weights are equal, unless all weights are equal to 1. The formula assumes that + * weights are to be treated as "expansion values," as will be the case if for example + * the weights represent frequency counts. To normalize weights so that the denominator + * in the variance computation equals the length of the input vector minus one, use <pre> + * <code>evaluate(values, MathUtils.normalizeArray(weights, values.length)); </code> + * </pre> + * <p> + * Returns 0 for a single-value (i.e. length = 1) sample.</p> + * <p> + * Throws <code>IllegalArgumentException</code> if any of the following are true: + * <ul><li>the values array is null</li> + * <li>the weights array is null</li> + * <li>the weights array does not have the same length as the values array</li> + * <li>the weights array contains one or more infinite values</li> + * <li>the weights array contains one or more NaN values</li> + * <li>the weights array contains negative values</li> + * <li>the start and length arguments do not determine a valid array</li> + * </ul></p> + * <p> + * Does not change the internal state of the statistic.</p> + * <p> + * Throws <code>IllegalArgumentException</code> if either array is null.</p> + * + * @param values the input array + * @param weights the weights array + * @param begin index of the first array element to include + * @param length the number of elements to include + * @return the weighted variance of the values or Double.NaN if length = 0 + * @throws IllegalArgumentException if the parameters are not valid + * @since 2.1 + */ + public double evaluate(final double[] values, final double[] weights, + final int begin, final int length) { + + double var = Double.NaN; + + if (test(values, weights,begin, length)) { + clear(); + if (length == 1) { + var = 0.0; + } else if (length > 1) { + Mean mean = new Mean(); + double m = mean.evaluate(values, weights, begin, length); + var = evaluate(values, weights, m, begin, length); + } + } + return var; + } + + /** + * <p> + * Returns the weighted variance of the entries in the the input array.</p> + * <p> + * Uses the formula <pre> + * Σ(weights[i]*(values[i] - weightedMean)<sup>2</sup>)/(Σ(weights[i]) - 1) + * </pre> + * where weightedMean is the weighted mean</p> + * <p> + * This formula will not return the same result as the unweighted variance when all + * weights are equal, unless all weights are equal to 1. The formula assumes that + * weights are to be treated as "expansion values," as will be the case if for example + * the weights represent frequency counts. To normalize weights so that the denominator + * in the variance computation equals the length of the input vector minus one, use <pre> + * <code>evaluate(values, MathUtils.normalizeArray(weights, values.length)); </code> + * </pre> + * <p> + * Returns 0 for a single-value (i.e. length = 1) sample.</p> + * <p> + * Throws <code>IllegalArgumentException</code> if any of the following are true: + * <ul><li>the values array is null</li> + * <li>the weights array is null</li> + * <li>the weights array does not have the same length as the values array</li> + * <li>the weights array contains one or more infinite values</li> + * <li>the weights array contains one or more NaN values</li> + * <li>the weights array contains negative values</li> + * </ul></p> + * <p> + * Does not change the internal state of the statistic.</p> + * <p> + * Throws <code>IllegalArgumentException</code> if either array is null.</p> + * + * @param values the input array + * @param weights the weights array + * @return the weighted variance of the values + * @throws IllegalArgumentException if the parameters are not valid + * @since 2.1 + */ + public double evaluate(final double[] values, final double[] weights) { + return evaluate(values, weights, 0, values.length); + } + + /** + * Returns the variance of the entries in the specified portion of + * the input array, using the precomputed mean value. Returns + * <code>Double.NaN</code> if the designated subarray is empty. + * <p> + * See {@link Variance} for details on the computing algorithm.</p> + * <p> + * The formula used assumes that the supplied mean value is the arithmetic + * mean of the sample data, not a known population parameter. This method + * is supplied only to save computation when the mean has already been + * computed.</p> + * <p> + * Returns 0 for a single-value (i.e. length = 1) sample.</p> + * <p> + * Throws <code>IllegalArgumentException</code> if the array is null.</p> + * <p> + * Does not change the internal state of the statistic.</p> + * + * @param values the input array + * @param mean the precomputed mean value + * @param begin index of the first array element to include + * @param length the number of elements to include + * @return the variance of the values or Double.NaN if length = 0 + * @throws IllegalArgumentException if the array is null or the array index + * parameters are not valid + */ + public double evaluate(final double[] values, final double mean, + final int begin, final int length) { + + double var = Double.NaN; + + if (test(values, begin, length)) { + if (length == 1) { + var = 0.0; + } else if (length > 1) { + double accum = 0.0; + double dev = 0.0; + double accum2 = 0.0; + for (int i = begin; i < begin + length; i++) { + dev = values[i] - mean; + accum += dev * dev; + accum2 += dev; + } + double len = length; + if (isBiasCorrected) { + var = (accum - (accum2 * accum2 / len)) / (len - 1.0); + } else { + var = (accum - (accum2 * accum2 / len)) / len; + } + } + } + return var; + } + + /** + * Returns the variance of the entries in the input array, using the + * precomputed mean value. Returns <code>Double.NaN</code> if the array + * is empty. + * <p> + * See {@link Variance} for details on the computing algorithm.</p> + * <p> + * If <code>isBiasCorrected</code> is <code>true</code> the formula used + * assumes that the supplied mean value is the arithmetic mean of the + * sample data, not a known population parameter. If the mean is a known + * population parameter, or if the "population" version of the variance is + * desired, set <code>isBiasCorrected</code> to <code>false</code> before + * invoking this method.</p> + * <p> + * Returns 0 for a single-value (i.e. length = 1) sample.</p> + * <p> + * Throws <code>IllegalArgumentException</code> if the array is null.</p> + * <p> + * Does not change the internal state of the statistic.</p> + * + * @param values the input array + * @param mean the precomputed mean value + * @return the variance of the values or Double.NaN if the array is empty + * @throws IllegalArgumentException if the array is null + */ + public double evaluate(final double[] values, final double mean) { + return evaluate(values, mean, 0, values.length); + } + + /** + * Returns the weighted variance of the entries in the specified portion of + * the input array, using the precomputed weighted mean value. Returns + * <code>Double.NaN</code> if the designated subarray is empty. + * <p> + * Uses the formula <pre> + * Σ(weights[i]*(values[i] - mean)<sup>2</sup>)/(Σ(weights[i]) - 1) + * </pre></p> + * <p> + * The formula used assumes that the supplied mean value is the weighted arithmetic + * mean of the sample data, not a known population parameter. This method + * is supplied only to save computation when the mean has already been + * computed.</p> + * <p> + * This formula will not return the same result as the unweighted variance when all + * weights are equal, unless all weights are equal to 1. The formula assumes that + * weights are to be treated as "expansion values," as will be the case if for example + * the weights represent frequency counts. To normalize weights so that the denominator + * in the variance computation equals the length of the input vector minus one, use <pre> + * <code>evaluate(values, MathUtils.normalizeArray(weights, values.length), mean); </code> + * </pre> + * <p> + * Returns 0 for a single-value (i.e. length = 1) sample.</p> + * <p> + * Throws <code>IllegalArgumentException</code> if any of the following are true: + * <ul><li>the values array is null</li> + * <li>the weights array is null</li> + * <li>the weights array does not have the same length as the values array</li> + * <li>the weights array contains one or more infinite values</li> + * <li>the weights array contains one or more NaN values</li> + * <li>the weights array contains negative values</li> + * <li>the start and length arguments do not determine a valid array</li> + * </ul></p> + * <p> + * Does not change the internal state of the statistic.</p> + * + * @param values the input array + * @param weights the weights array + * @param mean the precomputed weighted mean value + * @param begin index of the first array element to include + * @param length the number of elements to include + * @return the variance of the values or Double.NaN if length = 0 + * @throws IllegalArgumentException if the parameters are not valid + * @since 2.1 + */ + public double evaluate(final double[] values, final double[] weights, + final double mean, final int begin, final int length) { + + double var = Double.NaN; + + if (test(values, weights, begin, length)) { + if (length == 1) { + var = 0.0; + } else if (length > 1) { + double accum = 0.0; + double dev = 0.0; + double accum2 = 0.0; + for (int i = begin; i < begin + length; i++) { + dev = values[i] - mean; + accum += weights[i] * (dev * dev); + accum2 += weights[i] * dev; + } + + double sumWts = 0; + for (int i = 0; i < weights.length; i++) { + sumWts += weights[i]; + } + + if (isBiasCorrected) { + var = (accum - (accum2 * accum2 / sumWts)) / (sumWts - 1.0); + } else { + var = (accum - (accum2 * accum2 / sumWts)) / sumWts; + } + } + } + return var; + } + + /** + * <p>Returns the weighted variance of the values in the input array, using + * the precomputed weighted mean value.</p> + * <p> + * Uses the formula <pre> + * Σ(weights[i]*(values[i] - mean)<sup>2</sup>)/(Σ(weights[i]) - 1) + * </pre></p> + * <p> + * The formula used assumes that the supplied mean value is the weighted arithmetic + * mean of the sample data, not a known population parameter. This method + * is supplied only to save computation when the mean has already been + * computed.</p> + * <p> + * This formula will not return the same result as the unweighted variance when all + * weights are equal, unless all weights are equal to 1. The formula assumes that + * weights are to be treated as "expansion values," as will be the case if for example + * the weights represent frequency counts. To normalize weights so that the denominator + * in the variance computation equals the length of the input vector minus one, use <pre> + * <code>evaluate(values, MathUtils.normalizeArray(weights, values.length), mean); </code> + * </pre> + * <p> + * Returns 0 for a single-value (i.e. length = 1) sample.</p> + * <p> + * Throws <code>IllegalArgumentException</code> if any of the following are true: + * <ul><li>the values array is null</li> + * <li>the weights array is null</li> + * <li>the weights array does not have the same length as the values array</li> + * <li>the weights array contains one or more infinite values</li> + * <li>the weights array contains one or more NaN values</li> + * <li>the weights array contains negative values</li> + * </ul></p> + * <p> + * Does not change the internal state of the statistic.</p> + * + * @param values the input array + * @param weights the weights array + * @param mean the precomputed weighted mean value + * @return the variance of the values or Double.NaN if length = 0 + * @throws IllegalArgumentException if the parameters are not valid + * @since 2.1 + */ + public double evaluate(final double[] values, final double[] weights, final double mean) { + return evaluate(values, weights, mean, 0, values.length); + } + + /** + * @return Returns the isBiasCorrected. + */ + public boolean isBiasCorrected() { + return isBiasCorrected; + } + + /** + * @param biasCorrected The isBiasCorrected to set. + */ + public void setBiasCorrected(boolean biasCorrected) { + this.isBiasCorrected = biasCorrected; + } + + /** + * {@inheritDoc} + */ + @Override + public Variance copy() { + Variance result = new Variance(); + copy(this, result); + return result; + } + + /** + * Copies source to dest. + * <p>Neither source nor dest can be null.</p> + * + * @param source Variance to copy + * @param dest Variance to copy to + * @throws NullPointerException if either source or dest is null + */ + public static void copy(Variance source, Variance dest) { + if (source == null || + dest == null) { + throw new NullArgumentException(); + } + dest.setData(source.getDataRef()); + dest.moment = source.moment.copy(); + dest.isBiasCorrected = source.isBiasCorrected; + dest.incMoment = source.incMoment; + } +} |