diff options
Diffstat (limited to 'src/main/java/org/apache/commons/math/stat/inference/OneWayAnovaImpl.java')
-rw-r--r-- | src/main/java/org/apache/commons/math/stat/inference/OneWayAnovaImpl.java | 210 |
1 files changed, 210 insertions, 0 deletions
diff --git a/src/main/java/org/apache/commons/math/stat/inference/OneWayAnovaImpl.java b/src/main/java/org/apache/commons/math/stat/inference/OneWayAnovaImpl.java new file mode 100644 index 0000000..a47d0cf --- /dev/null +++ b/src/main/java/org/apache/commons/math/stat/inference/OneWayAnovaImpl.java @@ -0,0 +1,210 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math.stat.inference; + +import java.util.Collection; + +import org.apache.commons.math.MathException; +import org.apache.commons.math.MathRuntimeException; +import org.apache.commons.math.distribution.FDistribution; +import org.apache.commons.math.distribution.FDistributionImpl; +import org.apache.commons.math.exception.util.LocalizedFormats; +import org.apache.commons.math.stat.descriptive.summary.Sum; +import org.apache.commons.math.stat.descriptive.summary.SumOfSquares; + + +/** + * Implements one-way ANOVA statistics defined in the {@link OneWayAnovaImpl} + * interface. + * + * <p>Uses the + * {@link org.apache.commons.math.distribution.FDistribution + * commons-math F Distribution implementation} to estimate exact p-values.</p> + * + * <p>This implementation is based on a description at + * http://faculty.vassar.edu/lowry/ch13pt1.html</p> + * <pre> + * Abbreviations: bg = between groups, + * wg = within groups, + * ss = sum squared deviations + * </pre> + * + * @since 1.2 + * @version $Revision: 983921 $ $Date: 2010-08-10 12:46:06 +0200 (mar. 10 août 2010) $ + */ +public class OneWayAnovaImpl implements OneWayAnova { + + /** + * Default constructor. + */ + public OneWayAnovaImpl() { + } + + /** + * {@inheritDoc}<p> + * This implementation computes the F statistic using the definitional + * formula<pre> + * F = msbg/mswg</pre> + * where<pre> + * msbg = between group mean square + * mswg = within group mean square</pre> + * are as defined <a href="http://faculty.vassar.edu/lowry/ch13pt1.html"> + * here</a></p> + */ + public double anovaFValue(Collection<double[]> categoryData) + throws IllegalArgumentException, MathException { + AnovaStats a = anovaStats(categoryData); + return a.F; + } + + /** + * {@inheritDoc}<p> + * This implementation uses the + * {@link org.apache.commons.math.distribution.FDistribution + * commons-math F Distribution implementation} to estimate the exact + * p-value, using the formula<pre> + * p = 1 - cumulativeProbability(F)</pre> + * where <code>F</code> is the F value and <code>cumulativeProbability</code> + * is the commons-math implementation of the F distribution.</p> + */ + public double anovaPValue(Collection<double[]> categoryData) + throws IllegalArgumentException, MathException { + AnovaStats a = anovaStats(categoryData); + FDistribution fdist = new FDistributionImpl(a.dfbg, a.dfwg); + return 1.0 - fdist.cumulativeProbability(a.F); + } + + /** + * {@inheritDoc}<p> + * This implementation uses the + * {@link org.apache.commons.math.distribution.FDistribution + * commons-math F Distribution implementation} to estimate the exact + * p-value, using the formula<pre> + * p = 1 - cumulativeProbability(F)</pre> + * where <code>F</code> is the F value and <code>cumulativeProbability</code> + * is the commons-math implementation of the F distribution.</p> + * <p>True is returned iff the estimated p-value is less than alpha.</p> + */ + public boolean anovaTest(Collection<double[]> categoryData, double alpha) + throws IllegalArgumentException, MathException { + if ((alpha <= 0) || (alpha > 0.5)) { + throw MathRuntimeException.createIllegalArgumentException( + LocalizedFormats.OUT_OF_BOUND_SIGNIFICANCE_LEVEL, + alpha, 0, 0.5); + } + return anovaPValue(categoryData) < alpha; + } + + + /** + * This method actually does the calculations (except P-value). + * + * @param categoryData <code>Collection</code> of <code>double[]</code> + * arrays each containing data for one category + * @return computed AnovaStats + * @throws IllegalArgumentException if categoryData does not meet + * preconditions specified in the interface definition + * @throws MathException if an error occurs computing the Anova stats + */ + private AnovaStats anovaStats(Collection<double[]> categoryData) + throws IllegalArgumentException, MathException { + + // check if we have enough categories + if (categoryData.size() < 2) { + throw MathRuntimeException.createIllegalArgumentException( + LocalizedFormats.TWO_OR_MORE_CATEGORIES_REQUIRED, + categoryData.size()); + } + + // check if each category has enough data and all is double[] + for (double[] array : categoryData) { + if (array.length <= 1) { + throw MathRuntimeException.createIllegalArgumentException( + LocalizedFormats.TWO_OR_MORE_VALUES_IN_CATEGORY_REQUIRED, + array.length); + } + } + + int dfwg = 0; + double sswg = 0; + Sum totsum = new Sum(); + SumOfSquares totsumsq = new SumOfSquares(); + int totnum = 0; + + for (double[] data : categoryData) { + + Sum sum = new Sum(); + SumOfSquares sumsq = new SumOfSquares(); + int num = 0; + + for (int i = 0; i < data.length; i++) { + double val = data[i]; + + // within category + num++; + sum.increment(val); + sumsq.increment(val); + + // for all categories + totnum++; + totsum.increment(val); + totsumsq.increment(val); + } + dfwg += num - 1; + double ss = sumsq.getResult() - sum.getResult() * sum.getResult() / num; + sswg += ss; + } + double sst = totsumsq.getResult() - totsum.getResult() * + totsum.getResult()/totnum; + double ssbg = sst - sswg; + int dfbg = categoryData.size() - 1; + double msbg = ssbg/dfbg; + double mswg = sswg/dfwg; + double F = msbg/mswg; + + return new AnovaStats(dfbg, dfwg, F); + } + + /** + Convenience class to pass dfbg,dfwg,F values around within AnovaImpl. + No get/set methods provided. + */ + private static class AnovaStats { + + /** Degrees of freedom in numerator (between groups). */ + private int dfbg; + + /** Degrees of freedom in denominator (within groups). */ + private int dfwg; + + /** Statistic. */ + private double F; + + /** + * Constructor + * @param dfbg degrees of freedom in numerator (between groups) + * @param dfwg degrees of freedom in denominator (within groups) + * @param F statistic + */ + private AnovaStats(int dfbg, int dfwg, double F) { + this.dfbg = dfbg; + this.dfwg = dfwg; + this.F = F; + } + } + +} |