summaryrefslogtreecommitdiff
path: root/src/main/java/org/apache/commons/math/stat/inference/OneWayAnovaImpl.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/main/java/org/apache/commons/math/stat/inference/OneWayAnovaImpl.java')
-rw-r--r--src/main/java/org/apache/commons/math/stat/inference/OneWayAnovaImpl.java210
1 files changed, 210 insertions, 0 deletions
diff --git a/src/main/java/org/apache/commons/math/stat/inference/OneWayAnovaImpl.java b/src/main/java/org/apache/commons/math/stat/inference/OneWayAnovaImpl.java
new file mode 100644
index 0000000..a47d0cf
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/inference/OneWayAnovaImpl.java
@@ -0,0 +1,210 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.inference;
+
+import java.util.Collection;
+
+import org.apache.commons.math.MathException;
+import org.apache.commons.math.MathRuntimeException;
+import org.apache.commons.math.distribution.FDistribution;
+import org.apache.commons.math.distribution.FDistributionImpl;
+import org.apache.commons.math.exception.util.LocalizedFormats;
+import org.apache.commons.math.stat.descriptive.summary.Sum;
+import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
+
+
+/**
+ * Implements one-way ANOVA statistics defined in the {@link OneWayAnovaImpl}
+ * interface.
+ *
+ * <p>Uses the
+ * {@link org.apache.commons.math.distribution.FDistribution
+ * commons-math F Distribution implementation} to estimate exact p-values.</p>
+ *
+ * <p>This implementation is based on a description at
+ * http://faculty.vassar.edu/lowry/ch13pt1.html</p>
+ * <pre>
+ * Abbreviations: bg = between groups,
+ * wg = within groups,
+ * ss = sum squared deviations
+ * </pre>
+ *
+ * @since 1.2
+ * @version $Revision: 983921 $ $Date: 2010-08-10 12:46:06 +0200 (mar. 10 août 2010) $
+ */
+public class OneWayAnovaImpl implements OneWayAnova {
+
+ /**
+ * Default constructor.
+ */
+ public OneWayAnovaImpl() {
+ }
+
+ /**
+ * {@inheritDoc}<p>
+ * This implementation computes the F statistic using the definitional
+ * formula<pre>
+ * F = msbg/mswg</pre>
+ * where<pre>
+ * msbg = between group mean square
+ * mswg = within group mean square</pre>
+ * are as defined <a href="http://faculty.vassar.edu/lowry/ch13pt1.html">
+ * here</a></p>
+ */
+ public double anovaFValue(Collection<double[]> categoryData)
+ throws IllegalArgumentException, MathException {
+ AnovaStats a = anovaStats(categoryData);
+ return a.F;
+ }
+
+ /**
+ * {@inheritDoc}<p>
+ * This implementation uses the
+ * {@link org.apache.commons.math.distribution.FDistribution
+ * commons-math F Distribution implementation} to estimate the exact
+ * p-value, using the formula<pre>
+ * p = 1 - cumulativeProbability(F)</pre>
+ * where <code>F</code> is the F value and <code>cumulativeProbability</code>
+ * is the commons-math implementation of the F distribution.</p>
+ */
+ public double anovaPValue(Collection<double[]> categoryData)
+ throws IllegalArgumentException, MathException {
+ AnovaStats a = anovaStats(categoryData);
+ FDistribution fdist = new FDistributionImpl(a.dfbg, a.dfwg);
+ return 1.0 - fdist.cumulativeProbability(a.F);
+ }
+
+ /**
+ * {@inheritDoc}<p>
+ * This implementation uses the
+ * {@link org.apache.commons.math.distribution.FDistribution
+ * commons-math F Distribution implementation} to estimate the exact
+ * p-value, using the formula<pre>
+ * p = 1 - cumulativeProbability(F)</pre>
+ * where <code>F</code> is the F value and <code>cumulativeProbability</code>
+ * is the commons-math implementation of the F distribution.</p>
+ * <p>True is returned iff the estimated p-value is less than alpha.</p>
+ */
+ public boolean anovaTest(Collection<double[]> categoryData, double alpha)
+ throws IllegalArgumentException, MathException {
+ if ((alpha <= 0) || (alpha > 0.5)) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.OUT_OF_BOUND_SIGNIFICANCE_LEVEL,
+ alpha, 0, 0.5);
+ }
+ return anovaPValue(categoryData) < alpha;
+ }
+
+
+ /**
+ * This method actually does the calculations (except P-value).
+ *
+ * @param categoryData <code>Collection</code> of <code>double[]</code>
+ * arrays each containing data for one category
+ * @return computed AnovaStats
+ * @throws IllegalArgumentException if categoryData does not meet
+ * preconditions specified in the interface definition
+ * @throws MathException if an error occurs computing the Anova stats
+ */
+ private AnovaStats anovaStats(Collection<double[]> categoryData)
+ throws IllegalArgumentException, MathException {
+
+ // check if we have enough categories
+ if (categoryData.size() < 2) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.TWO_OR_MORE_CATEGORIES_REQUIRED,
+ categoryData.size());
+ }
+
+ // check if each category has enough data and all is double[]
+ for (double[] array : categoryData) {
+ if (array.length <= 1) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.TWO_OR_MORE_VALUES_IN_CATEGORY_REQUIRED,
+ array.length);
+ }
+ }
+
+ int dfwg = 0;
+ double sswg = 0;
+ Sum totsum = new Sum();
+ SumOfSquares totsumsq = new SumOfSquares();
+ int totnum = 0;
+
+ for (double[] data : categoryData) {
+
+ Sum sum = new Sum();
+ SumOfSquares sumsq = new SumOfSquares();
+ int num = 0;
+
+ for (int i = 0; i < data.length; i++) {
+ double val = data[i];
+
+ // within category
+ num++;
+ sum.increment(val);
+ sumsq.increment(val);
+
+ // for all categories
+ totnum++;
+ totsum.increment(val);
+ totsumsq.increment(val);
+ }
+ dfwg += num - 1;
+ double ss = sumsq.getResult() - sum.getResult() * sum.getResult() / num;
+ sswg += ss;
+ }
+ double sst = totsumsq.getResult() - totsum.getResult() *
+ totsum.getResult()/totnum;
+ double ssbg = sst - sswg;
+ int dfbg = categoryData.size() - 1;
+ double msbg = ssbg/dfbg;
+ double mswg = sswg/dfwg;
+ double F = msbg/mswg;
+
+ return new AnovaStats(dfbg, dfwg, F);
+ }
+
+ /**
+ Convenience class to pass dfbg,dfwg,F values around within AnovaImpl.
+ No get/set methods provided.
+ */
+ private static class AnovaStats {
+
+ /** Degrees of freedom in numerator (between groups). */
+ private int dfbg;
+
+ /** Degrees of freedom in denominator (within groups). */
+ private int dfwg;
+
+ /** Statistic. */
+ private double F;
+
+ /**
+ * Constructor
+ * @param dfbg degrees of freedom in numerator (between groups)
+ * @param dfwg degrees of freedom in denominator (within groups)
+ * @param F statistic
+ */
+ private AnovaStats(int dfbg, int dfwg, double F) {
+ this.dfbg = dfbg;
+ this.dfwg = dfwg;
+ this.F = F;
+ }
+ }
+
+}