summaryrefslogtreecommitdiff
path: root/src/main/java/org/apache/commons/math/stat
diff options
context:
space:
mode:
authorRaymond <siuchow@google.com>2015-04-02 10:43:13 -0700
committerRaymond <siuchow@google.com>2015-04-02 10:43:13 -0700
commitdee0849a9704d532af0b550146cbafbaa6ee1d19 (patch)
tree8ccce3a046c214fb609977b7fc53c40cef7f9ea5 /src/main/java/org/apache/commons/math/stat
parent55b0a5efc929efa9615babd3e760547f94e3518e (diff)
downloadapache-commons-math-dee0849a9704d532af0b550146cbafbaa6ee1d19.tar.gz
third party library: apache-commons-mathandroid-cts-6.0_r9android-cts-6.0_r8android-cts-6.0_r7android-cts-6.0_r6android-cts-6.0_r5android-cts-6.0_r4android-cts-6.0_r32android-cts-6.0_r31android-cts-6.0_r30android-cts-6.0_r3android-cts-6.0_r29android-cts-6.0_r28android-cts-6.0_r27android-cts-6.0_r26android-cts-6.0_r25android-cts-6.0_r24android-cts-6.0_r23android-cts-6.0_r22android-cts-6.0_r21android-cts-6.0_r20android-cts-6.0_r2android-cts-6.0_r19android-cts-6.0_r18android-cts-6.0_r17android-cts-6.0_r16android-cts-6.0_r15android-cts-6.0_r14android-cts-6.0_r13android-cts-6.0_r12android-cts-6.0_r1android-6.0.1_r9android-6.0.1_r81android-6.0.1_r80android-6.0.1_r8android-6.0.1_r79android-6.0.1_r78android-6.0.1_r77android-6.0.1_r74android-6.0.1_r73android-6.0.1_r72android-6.0.1_r70android-6.0.1_r7android-6.0.1_r69android-6.0.1_r68android-6.0.1_r67android-6.0.1_r66android-6.0.1_r65android-6.0.1_r63android-6.0.1_r62android-6.0.1_r61android-6.0.1_r60android-6.0.1_r59android-6.0.1_r58android-6.0.1_r57android-6.0.1_r56android-6.0.1_r55android-6.0.1_r54android-6.0.1_r53android-6.0.1_r52android-6.0.1_r51android-6.0.1_r50android-6.0.1_r5android-6.0.1_r49android-6.0.1_r48android-6.0.1_r47android-6.0.1_r46android-6.0.1_r45android-6.0.1_r43android-6.0.1_r42android-6.0.1_r41android-6.0.1_r40android-6.0.1_r4android-6.0.1_r33android-6.0.1_r32android-6.0.1_r31android-6.0.1_r30android-6.0.1_r3android-6.0.1_r28android-6.0.1_r27android-6.0.1_r26android-6.0.1_r25android-6.0.1_r24android-6.0.1_r22android-6.0.1_r21android-6.0.1_r20android-6.0.1_r18android-6.0.1_r17android-6.0.1_r16android-6.0.1_r13android-6.0.1_r12android-6.0.1_r11android-6.0.1_r10android-6.0.1_r1android-6.0.0_r7android-6.0.0_r6android-6.0.0_r5android-6.0.0_r41android-6.0.0_r4android-6.0.0_r3android-6.0.0_r26android-6.0.0_r25android-6.0.0_r24android-6.0.0_r23android-6.0.0_r2android-6.0.0_r13android-6.0.0_r12android-6.0.0_r11android-6.0.0_r1marshmallow-releasemarshmallow-mr3-releasemarshmallow-mr2-releasemarshmallow-mr1-releasemarshmallow-mr1-devmarshmallow-dr1.6-releasemarshmallow-dr1.5-releasemarshmallow-dr1.5-devmarshmallow-dr-releasemarshmallow-dr-dragon-releasemarshmallow-dr-devmarshmallow-devmarshmallow-cts-release
Change-Id: I52a325624a7f0dd652b362a9840626d6d9f3c42b
Diffstat (limited to 'src/main/java/org/apache/commons/math/stat')
-rw-r--r--src/main/java/org/apache/commons/math/stat/Frequency.java603
-rw-r--r--src/main/java/org/apache/commons/math/stat/StatUtils.java663
-rw-r--r--src/main/java/org/apache/commons/math/stat/clustering/Cluster.java74
-rw-r--r--src/main/java/org/apache/commons/math/stat/clustering/Clusterable.java46
-rw-r--r--src/main/java/org/apache/commons/math/stat/clustering/EuclideanIntegerPoint.java120
-rw-r--r--src/main/java/org/apache/commons/math/stat/clustering/KMeansPlusPlusClusterer.java333
-rw-r--r--src/main/java/org/apache/commons/math/stat/clustering/package.html20
-rw-r--r--src/main/java/org/apache/commons/math/stat/correlation/Covariance.java274
-rw-r--r--src/main/java/org/apache/commons/math/stat/correlation/PearsonsCorrelation.java285
-rw-r--r--src/main/java/org/apache/commons/math/stat/correlation/SpearmansCorrelation.java172
-rw-r--r--src/main/java/org/apache/commons/math/stat/correlation/package.html22
-rw-r--r--src/main/java/org/apache/commons/math/stat/descriptive/AbstractStorelessUnivariateStatistic.java183
-rw-r--r--src/main/java/org/apache/commons/math/stat/descriptive/AbstractUnivariateStatistic.java232
-rw-r--r--src/main/java/org/apache/commons/math/stat/descriptive/AggregateSummaryStatistics.java416
-rw-r--r--src/main/java/org/apache/commons/math/stat/descriptive/DescriptiveStatistics.java721
-rw-r--r--src/main/java/org/apache/commons/math/stat/descriptive/MultivariateSummaryStatistics.java637
-rw-r--r--src/main/java/org/apache/commons/math/stat/descriptive/StatisticalMultivariateSummary.java120
-rw-r--r--src/main/java/org/apache/commons/math/stat/descriptive/StatisticalSummary.java65
-rw-r--r--src/main/java/org/apache/commons/math/stat/descriptive/StatisticalSummaryValues.java186
-rw-r--r--src/main/java/org/apache/commons/math/stat/descriptive/StorelessUnivariateStatistic.java86
-rw-r--r--src/main/java/org/apache/commons/math/stat/descriptive/SummaryStatistics.java717
-rw-r--r--src/main/java/org/apache/commons/math/stat/descriptive/SynchronizedDescriptiveStatistics.java172
-rw-r--r--src/main/java/org/apache/commons/math/stat/descriptive/SynchronizedMultivariateSummaryStatistics.java299
-rw-r--r--src/main/java/org/apache/commons/math/stat/descriptive/SynchronizedSummaryStatistics.java333
-rw-r--r--src/main/java/org/apache/commons/math/stat/descriptive/UnivariateStatistic.java53
-rw-r--r--src/main/java/org/apache/commons/math/stat/descriptive/WeightedEvaluation.java49
-rw-r--r--src/main/java/org/apache/commons/math/stat/descriptive/moment/FirstMoment.java160
-rw-r--r--src/main/java/org/apache/commons/math/stat/descriptive/moment/FourthMoment.java142
-rw-r--r--src/main/java/org/apache/commons/math/stat/descriptive/moment/GeometricMean.java205
-rw-r--r--src/main/java/org/apache/commons/math/stat/descriptive/moment/Kurtosis.java222
-rw-r--r--src/main/java/org/apache/commons/math/stat/descriptive/moment/Mean.java272
-rw-r--r--src/main/java/org/apache/commons/math/stat/descriptive/moment/SecondMoment.java124
-rw-r--r--src/main/java/org/apache/commons/math/stat/descriptive/moment/SemiVariance.java379
-rw-r--r--src/main/java/org/apache/commons/math/stat/descriptive/moment/Skewness.java213
-rw-r--r--src/main/java/org/apache/commons/math/stat/descriptive/moment/StandardDeviation.java271
-rw-r--r--src/main/java/org/apache/commons/math/stat/descriptive/moment/ThirdMoment.java139
-rw-r--r--src/main/java/org/apache/commons/math/stat/descriptive/moment/Variance.java610
-rw-r--r--src/main/java/org/apache/commons/math/stat/descriptive/moment/VectorialCovariance.java152
-rw-r--r--src/main/java/org/apache/commons/math/stat/descriptive/moment/VectorialMean.java103
-rw-r--r--src/main/java/org/apache/commons/math/stat/descriptive/moment/package.html20
-rw-r--r--src/main/java/org/apache/commons/math/stat/descriptive/package.html41
-rw-r--r--src/main/java/org/apache/commons/math/stat/descriptive/rank/Max.java163
-rw-r--r--src/main/java/org/apache/commons/math/stat/descriptive/rank/Median.java55
-rw-r--r--src/main/java/org/apache/commons/math/stat/descriptive/rank/Min.java163
-rw-r--r--src/main/java/org/apache/commons/math/stat/descriptive/rank/Percentile.java497
-rw-r--r--src/main/java/org/apache/commons/math/stat/descriptive/rank/package.html20
-rw-r--r--src/main/java/org/apache/commons/math/stat/descriptive/summary/Product.java224
-rw-r--r--src/main/java/org/apache/commons/math/stat/descriptive/summary/Sum.java220
-rw-r--r--src/main/java/org/apache/commons/math/stat/descriptive/summary/SumOfLogs.java165
-rw-r--r--src/main/java/org/apache/commons/math/stat/descriptive/summary/SumOfSquares.java154
-rw-r--r--src/main/java/org/apache/commons/math/stat/descriptive/summary/package.html20
-rw-r--r--src/main/java/org/apache/commons/math/stat/inference/ChiSquareTest.java222
-rw-r--r--src/main/java/org/apache/commons/math/stat/inference/ChiSquareTestImpl.java424
-rw-r--r--src/main/java/org/apache/commons/math/stat/inference/OneWayAnova.java103
-rw-r--r--src/main/java/org/apache/commons/math/stat/inference/OneWayAnovaImpl.java210
-rw-r--r--src/main/java/org/apache/commons/math/stat/inference/TTest.java771
-rw-r--r--src/main/java/org/apache/commons/math/stat/inference/TTestImpl.java1069
-rw-r--r--src/main/java/org/apache/commons/math/stat/inference/TestUtils.java436
-rw-r--r--src/main/java/org/apache/commons/math/stat/inference/UnknownDistributionChiSquareTest.java144
-rw-r--r--src/main/java/org/apache/commons/math/stat/inference/package.html23
-rw-r--r--src/main/java/org/apache/commons/math/stat/package.html20
-rw-r--r--src/main/java/org/apache/commons/math/stat/ranking/NaNStrategy.java49
-rw-r--r--src/main/java/org/apache/commons/math/stat/ranking/NaturalRanking.java464
-rw-r--r--src/main/java/org/apache/commons/math/stat/ranking/RankingAlgorithm.java41
-rw-r--r--src/main/java/org/apache/commons/math/stat/ranking/TiesStrategy.java55
-rw-r--r--src/main/java/org/apache/commons/math/stat/ranking/package.html22
-rw-r--r--src/main/java/org/apache/commons/math/stat/regression/AbstractMultipleLinearRegression.java366
-rw-r--r--src/main/java/org/apache/commons/math/stat/regression/GLSMultipleLinearRegression.java136
-rw-r--r--src/main/java/org/apache/commons/math/stat/regression/MultipleLinearRegression.java70
-rw-r--r--src/main/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegression.java233
-rw-r--r--src/main/java/org/apache/commons/math/stat/regression/SimpleRegression.java639
-rw-r--r--src/main/java/org/apache/commons/math/stat/regression/package.html22
72 files changed, 17134 insertions, 0 deletions
diff --git a/src/main/java/org/apache/commons/math/stat/Frequency.java b/src/main/java/org/apache/commons/math/stat/Frequency.java
new file mode 100644
index 0000000..434819e
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/Frequency.java
@@ -0,0 +1,603 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat;
+
+import java.io.Serializable;
+import java.text.NumberFormat;
+import java.util.Iterator;
+import java.util.Comparator;
+import java.util.TreeMap;
+
+import org.apache.commons.math.MathRuntimeException;
+import org.apache.commons.math.exception.util.LocalizedFormats;
+
+/**
+ * Maintains a frequency distribution.
+ * <p>
+ * Accepts int, long, char or Comparable values. New values added must be
+ * comparable to those that have been added, otherwise the add method will
+ * throw an IllegalArgumentException.</p>
+ * <p>
+ * Integer values (int, long, Integer, Long) are not distinguished by type --
+ * i.e. <code>addValue(Long.valueOf(2)), addValue(2), addValue(2l)</code> all have
+ * the same effect (similarly for arguments to <code>getCount,</code> etc.).</p>
+ * <p>
+ * char values are converted by <code>addValue</code> to Character instances.
+ * As such, these values are not comparable to integral values, so attempts
+ * to combine integral types with chars in a frequency distribution will fail.
+ * </p>
+ * <p>
+ * The values are ordered using the default (natural order), unless a
+ * <code>Comparator</code> is supplied in the constructor.</p>
+ *
+ * @version $Revision: 1054186 $ $Date: 2011-01-01 03:28:46 +0100 (sam. 01 janv. 2011) $
+ */
+public class Frequency implements Serializable {
+
+ /** Serializable version identifier */
+ private static final long serialVersionUID = -3845586908418844111L;
+
+ /** underlying collection */
+ private final TreeMap<Comparable<?>, Long> freqTable;
+
+ /**
+ * Default constructor.
+ */
+ public Frequency() {
+ freqTable = new TreeMap<Comparable<?>, Long>();
+ }
+
+ /**
+ * Constructor allowing values Comparator to be specified.
+ *
+ * @param comparator Comparator used to order values
+ */
+ @SuppressWarnings("unchecked") // TODO is the cast OK?
+ public Frequency(Comparator<?> comparator) {
+ freqTable = new TreeMap<Comparable<?>, Long>((Comparator<? super Comparable<?>>) comparator);
+ }
+
+ /**
+ * Return a string representation of this frequency
+ * distribution.
+ *
+ * @return a string representation.
+ */
+ @Override
+ public String toString() {
+ NumberFormat nf = NumberFormat.getPercentInstance();
+ StringBuilder outBuffer = new StringBuilder();
+ outBuffer.append("Value \t Freq. \t Pct. \t Cum Pct. \n");
+ Iterator<Comparable<?>> iter = freqTable.keySet().iterator();
+ while (iter.hasNext()) {
+ Comparable<?> value = iter.next();
+ outBuffer.append(value);
+ outBuffer.append('\t');
+ outBuffer.append(getCount(value));
+ outBuffer.append('\t');
+ outBuffer.append(nf.format(getPct(value)));
+ outBuffer.append('\t');
+ outBuffer.append(nf.format(getCumPct(value)));
+ outBuffer.append('\n');
+ }
+ return outBuffer.toString();
+ }
+
+ /**
+ * Adds 1 to the frequency count for v.
+ * <p>
+ * If other objects have already been added to this Frequency, v must
+ * be comparable to those that have already been added.
+ * </p>
+ *
+ * @param v the value to add.
+ * @throws IllegalArgumentException if <code>v</code> is not Comparable,
+ * or is not comparable with previous entries
+ * @deprecated use {@link #addValue(Comparable)} instead
+ */
+ @Deprecated
+ public void addValue(Object v) {
+ if (v instanceof Comparable<?>){
+ addValue((Comparable<?>) v);
+ } else {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.CLASS_DOESNT_IMPLEMENT_COMPARABLE,
+ v.getClass().getName());
+ }
+ }
+
+ /**
+ * Adds 1 to the frequency count for v.
+ * <p>
+ * If other objects have already been added to this Frequency, v must
+ * be comparable to those that have already been added.
+ * </p>
+ *
+ * @param v the value to add.
+ * @throws IllegalArgumentException if <code>v</code> is not comparable with previous entries
+ */
+ public void addValue(Comparable<?> v){
+ Comparable<?> obj = v;
+ if (v instanceof Integer) {
+ obj = Long.valueOf(((Integer) v).longValue());
+ }
+ try {
+ Long count = freqTable.get(obj);
+ if (count == null) {
+ freqTable.put(obj, Long.valueOf(1));
+ } else {
+ freqTable.put(obj, Long.valueOf(count.longValue() + 1));
+ }
+ } catch (ClassCastException ex) {
+ //TreeMap will throw ClassCastException if v is not comparable
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.INSTANCES_NOT_COMPARABLE_TO_EXISTING_VALUES,
+ v.getClass().getName());
+ }
+ }
+
+ /**
+ * Adds 1 to the frequency count for v.
+ *
+ * @param v the value to add.
+ */
+ public void addValue(int v) {
+ addValue(Long.valueOf(v));
+ }
+
+ /**
+ * Adds 1 to the frequency count for v.
+ *
+ * @param v the value to add.
+ * @deprecated to be removed in math 3.0
+ */
+ @Deprecated
+ public void addValue(Integer v) {
+ addValue(Long.valueOf(v.longValue()));
+ }
+
+ /**
+ * Adds 1 to the frequency count for v.
+ *
+ * @param v the value to add.
+ */
+ public void addValue(long v) {
+ addValue(Long.valueOf(v));
+ }
+
+ /**
+ * Adds 1 to the frequency count for v.
+ *
+ * @param v the value to add.
+ */
+ public void addValue(char v) {
+ addValue(Character.valueOf(v));
+ }
+
+ /** Clears the frequency table */
+ public void clear() {
+ freqTable.clear();
+ }
+
+ /**
+ * Returns an Iterator over the set of values that have been added.
+ * <p>
+ * If added values are integral (i.e., integers, longs, Integers, or Longs),
+ * they are converted to Longs when they are added, so the objects returned
+ * by the Iterator will in this case be Longs.</p>
+ *
+ * @return values Iterator
+ */
+ public Iterator<Comparable<?>> valuesIterator() {
+ return freqTable.keySet().iterator();
+ }
+
+ //-------------------------------------------------------------------------
+
+ /**
+ * Returns the sum of all frequencies.
+ *
+ * @return the total frequency count.
+ */
+ public long getSumFreq() {
+ long result = 0;
+ Iterator<Long> iterator = freqTable.values().iterator();
+ while (iterator.hasNext()) {
+ result += iterator.next().longValue();
+ }
+ return result;
+ }
+
+ /**
+ * Returns the number of values = v.
+ * Returns 0 if the value is not comparable.
+ *
+ * @param v the value to lookup.
+ * @return the frequency of v.
+ * @deprecated replaced by {@link #getCount(Comparable)} as of 2.0
+ */
+ @Deprecated
+ public long getCount(Object v) {
+ return getCount((Comparable<?>) v);
+ }
+
+ /**
+ * Returns the number of values = v.
+ * Returns 0 if the value is not comparable.
+ *
+ * @param v the value to lookup.
+ * @return the frequency of v.
+ */
+ public long getCount(Comparable<?> v) {
+ if (v instanceof Integer) {
+ return getCount(((Integer) v).longValue());
+ }
+ long result = 0;
+ try {
+ Long count = freqTable.get(v);
+ if (count != null) {
+ result = count.longValue();
+ }
+ } catch (ClassCastException ex) {
+ // ignore and return 0 -- ClassCastException will be thrown if value is not comparable
+ }
+ return result;
+ }
+
+ /**
+ * Returns the number of values = v.
+ *
+ * @param v the value to lookup.
+ * @return the frequency of v.
+ */
+ public long getCount(int v) {
+ return getCount(Long.valueOf(v));
+ }
+
+ /**
+ * Returns the number of values = v.
+ *
+ * @param v the value to lookup.
+ * @return the frequency of v.
+ */
+ public long getCount(long v) {
+ return getCount(Long.valueOf(v));
+ }
+
+ /**
+ * Returns the number of values = v.
+ *
+ * @param v the value to lookup.
+ * @return the frequency of v.
+ */
+ public long getCount(char v) {
+ return getCount(Character.valueOf(v));
+ }
+
+ /**
+ * Returns the number of values in the frequency table.
+ *
+ * @return the number of unique values that have been added to the frequency table.
+ * @see #valuesIterator()
+ */
+ public int getUniqueCount(){
+ return freqTable.keySet().size();
+ }
+
+ //-------------------------------------------------------------
+
+ /**
+ * Returns the percentage of values that are equal to v
+ * (as a proportion between 0 and 1).
+ * <p>
+ * Returns <code>Double.NaN</code> if no values have been added.</p>
+ *
+ * @param v the value to lookup
+ * @return the proportion of values equal to v
+ * @deprecated replaced by {@link #getPct(Comparable)} as of 2.0
+ */
+ @Deprecated
+ public double getPct(Object v) {
+ return getPct((Comparable<?>) v);
+ }
+
+ /**
+ * Returns the percentage of values that are equal to v
+ * (as a proportion between 0 and 1).
+ * <p>
+ * Returns <code>Double.NaN</code> if no values have been added.</p>
+ *
+ * @param v the value to lookup
+ * @return the proportion of values equal to v
+ */
+ public double getPct(Comparable<?> v) {
+ final long sumFreq = getSumFreq();
+ if (sumFreq == 0) {
+ return Double.NaN;
+ }
+ return (double) getCount(v) / (double) sumFreq;
+ }
+
+ /**
+ * Returns the percentage of values that are equal to v
+ * (as a proportion between 0 and 1).
+ *
+ * @param v the value to lookup
+ * @return the proportion of values equal to v
+ */
+ public double getPct(int v) {
+ return getPct(Long.valueOf(v));
+ }
+
+ /**
+ * Returns the percentage of values that are equal to v
+ * (as a proportion between 0 and 1).
+ *
+ * @param v the value to lookup
+ * @return the proportion of values equal to v
+ */
+ public double getPct(long v) {
+ return getPct(Long.valueOf(v));
+ }
+
+ /**
+ * Returns the percentage of values that are equal to v
+ * (as a proportion between 0 and 1).
+ *
+ * @param v the value to lookup
+ * @return the proportion of values equal to v
+ */
+ public double getPct(char v) {
+ return getPct(Character.valueOf(v));
+ }
+
+ //-----------------------------------------------------------------------------------------
+
+ /**
+ * Returns the cumulative frequency of values less than or equal to v.
+ * <p>
+ * Returns 0 if v is not comparable to the values set.</p>
+ *
+ * @param v the value to lookup.
+ * @return the proportion of values equal to v
+ * @deprecated replaced by {@link #getCumFreq(Comparable)} as of 2.0
+ */
+ @Deprecated
+ public long getCumFreq(Object v) {
+ return getCumFreq((Comparable<?>) v);
+ }
+
+ /**
+ * Returns the cumulative frequency of values less than or equal to v.
+ * <p>
+ * Returns 0 if v is not comparable to the values set.</p>
+ *
+ * @param v the value to lookup.
+ * @return the proportion of values equal to v
+ */
+ public long getCumFreq(Comparable<?> v) {
+ if (getSumFreq() == 0) {
+ return 0;
+ }
+ if (v instanceof Integer) {
+ return getCumFreq(((Integer) v).longValue());
+ }
+ @SuppressWarnings("unchecked") // OK, freqTable is Comparable<?>
+ Comparator<Comparable<?>> c = (Comparator<Comparable<?>>) freqTable.comparator();
+ if (c == null) {
+ c = new NaturalComparator();
+ }
+ long result = 0;
+
+ try {
+ Long value = freqTable.get(v);
+ if (value != null) {
+ result = value.longValue();
+ }
+ } catch (ClassCastException ex) {
+ return result; // v is not comparable
+ }
+
+ if (c.compare(v, freqTable.firstKey()) < 0) {
+ return 0; // v is comparable, but less than first value
+ }
+
+ if (c.compare(v, freqTable.lastKey()) >= 0) {
+ return getSumFreq(); // v is comparable, but greater than the last value
+ }
+
+ Iterator<Comparable<?>> values = valuesIterator();
+ while (values.hasNext()) {
+ Comparable<?> nextValue = values.next();
+ if (c.compare(v, nextValue) > 0) {
+ result += getCount(nextValue);
+ } else {
+ return result;
+ }
+ }
+ return result;
+ }
+
+ /**
+ * Returns the cumulative frequency of values less than or equal to v.
+ * <p>
+ * Returns 0 if v is not comparable to the values set.</p>
+ *
+ * @param v the value to lookup
+ * @return the proportion of values equal to v
+ */
+ public long getCumFreq(int v) {
+ return getCumFreq(Long.valueOf(v));
+ }
+
+ /**
+ * Returns the cumulative frequency of values less than or equal to v.
+ * <p>
+ * Returns 0 if v is not comparable to the values set.</p>
+ *
+ * @param v the value to lookup
+ * @return the proportion of values equal to v
+ */
+ public long getCumFreq(long v) {
+ return getCumFreq(Long.valueOf(v));
+ }
+
+ /**
+ * Returns the cumulative frequency of values less than or equal to v.
+ * <p>
+ * Returns 0 if v is not comparable to the values set.</p>
+ *
+ * @param v the value to lookup
+ * @return the proportion of values equal to v
+ */
+ public long getCumFreq(char v) {
+ return getCumFreq(Character.valueOf(v));
+ }
+
+ //----------------------------------------------------------------------------------------------
+
+ /**
+ * Returns the cumulative percentage of values less than or equal to v
+ * (as a proportion between 0 and 1).
+ * <p>
+ * Returns <code>Double.NaN</code> if no values have been added.
+ * Returns 0 if at least one value has been added, but v is not comparable
+ * to the values set.</p>
+ *
+ * @param v the value to lookup
+ * @return the proportion of values less than or equal to v
+ * @deprecated replaced by {@link #getCumPct(Comparable)} as of 2.0
+ */
+ @Deprecated
+ public double getCumPct(Object v) {
+ return getCumPct((Comparable<?>) v);
+
+ }
+
+ /**
+ * Returns the cumulative percentage of values less than or equal to v
+ * (as a proportion between 0 and 1).
+ * <p>
+ * Returns <code>Double.NaN</code> if no values have been added.
+ * Returns 0 if at least one value has been added, but v is not comparable
+ * to the values set.</p>
+ *
+ * @param v the value to lookup
+ * @return the proportion of values less than or equal to v
+ */
+ public double getCumPct(Comparable<?> v) {
+ final long sumFreq = getSumFreq();
+ if (sumFreq == 0) {
+ return Double.NaN;
+ }
+ return (double) getCumFreq(v) / (double) sumFreq;
+ }
+
+ /**
+ * Returns the cumulative percentage of values less than or equal to v
+ * (as a proportion between 0 and 1).
+ * <p>
+ * Returns 0 if v is not comparable to the values set.</p>
+ *
+ * @param v the value to lookup
+ * @return the proportion of values less than or equal to v
+ */
+ public double getCumPct(int v) {
+ return getCumPct(Long.valueOf(v));
+ }
+
+ /**
+ * Returns the cumulative percentage of values less than or equal to v
+ * (as a proportion between 0 and 1).
+ * <p>
+ * Returns 0 if v is not comparable to the values set.</p>
+ *
+ * @param v the value to lookup
+ * @return the proportion of values less than or equal to v
+ */
+ public double getCumPct(long v) {
+ return getCumPct(Long.valueOf(v));
+ }
+
+ /**
+ * Returns the cumulative percentage of values less than or equal to v
+ * (as a proportion between 0 and 1).
+ * <p>
+ * Returns 0 if v is not comparable to the values set.</p>
+ *
+ * @param v the value to lookup
+ * @return the proportion of values less than or equal to v
+ */
+ public double getCumPct(char v) {
+ return getCumPct(Character.valueOf(v));
+ }
+
+ /**
+ * A Comparator that compares comparable objects using the
+ * natural order. Copied from Commons Collections ComparableComparator.
+ */
+ private static class NaturalComparator<T extends Comparable<T>> implements Comparator<Comparable<T>>, Serializable {
+
+ /** Serializable version identifier */
+ private static final long serialVersionUID = -3852193713161395148L;
+
+ /**
+ * Compare the two {@link Comparable Comparable} arguments.
+ * This method is equivalent to:
+ * <pre>(({@link Comparable Comparable})o1).{@link Comparable#compareTo compareTo}(o2)</pre>
+ *
+ * @param o1 the first object
+ * @param o2 the second object
+ * @return result of comparison
+ * @throws NullPointerException when <i>o1</i> is <code>null</code>,
+ * or when <code>((Comparable)o1).compareTo(o2)</code> does
+ * @throws ClassCastException when <i>o1</i> is not a {@link Comparable Comparable},
+ * or when <code>((Comparable)o1).compareTo(o2)</code> does
+ */
+ @SuppressWarnings("unchecked") // cast to (T) may throw ClassCastException, see Javadoc
+ public int compare(Comparable<T> o1, Comparable<T> o2) {
+ return o1.compareTo((T) o2);
+ }
+ }
+
+ /** {@inheritDoc} */
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result +
+ ((freqTable == null) ? 0 : freqTable.hashCode());
+ return result;
+ }
+
+ /** {@inheritDoc} */
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj)
+ return true;
+ if (!(obj instanceof Frequency))
+ return false;
+ Frequency other = (Frequency) obj;
+ if (freqTable == null) {
+ if (other.freqTable != null)
+ return false;
+ } else if (!freqTable.equals(other.freqTable))
+ return false;
+ return true;
+ }
+
+}
diff --git a/src/main/java/org/apache/commons/math/stat/StatUtils.java b/src/main/java/org/apache/commons/math/stat/StatUtils.java
new file mode 100644
index 0000000..7ae1e17
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/StatUtils.java
@@ -0,0 +1,663 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat;
+
+import org.apache.commons.math.MathRuntimeException;
+import org.apache.commons.math.exception.util.LocalizedFormats;
+import org.apache.commons.math.stat.descriptive.DescriptiveStatistics;
+import org.apache.commons.math.stat.descriptive.UnivariateStatistic;
+import org.apache.commons.math.stat.descriptive.moment.GeometricMean;
+import org.apache.commons.math.stat.descriptive.moment.Mean;
+import org.apache.commons.math.stat.descriptive.moment.Variance;
+import org.apache.commons.math.stat.descriptive.rank.Max;
+import org.apache.commons.math.stat.descriptive.rank.Min;
+import org.apache.commons.math.stat.descriptive.rank.Percentile;
+import org.apache.commons.math.stat.descriptive.summary.Product;
+import org.apache.commons.math.stat.descriptive.summary.Sum;
+import org.apache.commons.math.stat.descriptive.summary.SumOfLogs;
+import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
+
+/**
+ * StatUtils provides static methods for computing statistics based on data
+ * stored in double[] arrays.
+ *
+ * @version $Revision: 1073276 $ $Date: 2011-02-22 10:34:52 +0100 (mar. 22 févr. 2011) $
+ */
+public final class StatUtils {
+
+ /** sum */
+ private static final UnivariateStatistic SUM = new Sum();
+
+ /** sumSq */
+ private static final UnivariateStatistic SUM_OF_SQUARES = new SumOfSquares();
+
+ /** prod */
+ private static final UnivariateStatistic PRODUCT = new Product();
+
+ /** sumLog */
+ private static final UnivariateStatistic SUM_OF_LOGS = new SumOfLogs();
+
+ /** min */
+ private static final UnivariateStatistic MIN = new Min();
+
+ /** max */
+ private static final UnivariateStatistic MAX = new Max();
+
+ /** mean */
+ private static final UnivariateStatistic MEAN = new Mean();
+
+ /** variance */
+ private static final Variance VARIANCE = new Variance();
+
+ /** percentile */
+ private static final Percentile PERCENTILE = new Percentile();
+
+ /** geometric mean */
+ private static final GeometricMean GEOMETRIC_MEAN = new GeometricMean();
+
+ /**
+ * Private Constructor
+ */
+ private StatUtils() {
+ }
+
+ /**
+ * Returns the sum of the values in the input array, or
+ * <code>Double.NaN</code> if the array is empty.
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if the input array
+ * is null.</p>
+ *
+ * @param values array of values to sum
+ * @return the sum of the values or <code>Double.NaN</code> if the array
+ * is empty
+ * @throws IllegalArgumentException if the array is null
+ */
+ public static double sum(final double[] values) {
+ return SUM.evaluate(values);
+ }
+
+ /**
+ * Returns the sum of the entries in the specified portion of
+ * the input array, or <code>Double.NaN</code> if the designated subarray
+ * is empty.
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if the array is null.</p>
+ *
+ * @param values the input array
+ * @param begin index of the first array element to include
+ * @param length the number of elements to include
+ * @return the sum of the values or Double.NaN if length = 0
+ * @throws IllegalArgumentException if the array is null or the array index
+ * parameters are not valid
+ */
+ public static double sum(final double[] values, final int begin,
+ final int length) {
+ return SUM.evaluate(values, begin, length);
+ }
+
+ /**
+ * Returns the sum of the squares of the entries in the input array, or
+ * <code>Double.NaN</code> if the array is empty.
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if the array is null.</p>
+ *
+ * @param values input array
+ * @return the sum of the squared values or <code>Double.NaN</code> if the
+ * array is empty
+ * @throws IllegalArgumentException if the array is null
+ */
+ public static double sumSq(final double[] values) {
+ return SUM_OF_SQUARES.evaluate(values);
+ }
+
+ /**
+ * Returns the sum of the squares of the entries in the specified portion of
+ * the input array, or <code>Double.NaN</code> if the designated subarray
+ * is empty.
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if the array is null.</p>
+ *
+ * @param values the input array
+ * @param begin index of the first array element to include
+ * @param length the number of elements to include
+ * @return the sum of the squares of the values or Double.NaN if length = 0
+ * @throws IllegalArgumentException if the array is null or the array index
+ * parameters are not valid
+ */
+ public static double sumSq(final double[] values, final int begin,
+ final int length) {
+ return SUM_OF_SQUARES.evaluate(values, begin, length);
+ }
+
+ /**
+ * Returns the product of the entries in the input array, or
+ * <code>Double.NaN</code> if the array is empty.
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if the array is null.</p>
+ *
+ * @param values the input array
+ * @return the product of the values or Double.NaN if the array is empty
+ * @throws IllegalArgumentException if the array is null
+ */
+ public static double product(final double[] values) {
+ return PRODUCT.evaluate(values);
+ }
+
+ /**
+ * Returns the product of the entries in the specified portion of
+ * the input array, or <code>Double.NaN</code> if the designated subarray
+ * is empty.
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if the array is null.</p>
+ *
+ * @param values the input array
+ * @param begin index of the first array element to include
+ * @param length the number of elements to include
+ * @return the product of the values or Double.NaN if length = 0
+ * @throws IllegalArgumentException if the array is null or the array index
+ * parameters are not valid
+ */
+ public static double product(final double[] values, final int begin,
+ final int length) {
+ return PRODUCT.evaluate(values, begin, length);
+ }
+
+ /**
+ * Returns the sum of the natural logs of the entries in the input array, or
+ * <code>Double.NaN</code> if the array is empty.
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if the array is null.</p>
+ * <p>
+ * See {@link org.apache.commons.math.stat.descriptive.summary.SumOfLogs}.
+ * </p>
+ *
+ * @param values the input array
+ * @return the sum of the natural logs of the values or Double.NaN if
+ * the array is empty
+ * @throws IllegalArgumentException if the array is null
+ */
+ public static double sumLog(final double[] values) {
+ return SUM_OF_LOGS.evaluate(values);
+ }
+
+ /**
+ * Returns the sum of the natural logs of the entries in the specified portion of
+ * the input array, or <code>Double.NaN</code> if the designated subarray
+ * is empty.
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if the array is null.</p>
+ * <p>
+ * See {@link org.apache.commons.math.stat.descriptive.summary.SumOfLogs}.
+ * </p>
+ *
+ * @param values the input array
+ * @param begin index of the first array element to include
+ * @param length the number of elements to include
+ * @return the sum of the natural logs of the values or Double.NaN if
+ * length = 0
+ * @throws IllegalArgumentException if the array is null or the array index
+ * parameters are not valid
+ */
+ public static double sumLog(final double[] values, final int begin,
+ final int length) {
+ return SUM_OF_LOGS.evaluate(values, begin, length);
+ }
+
+ /**
+ * Returns the arithmetic mean of the entries in the input array, or
+ * <code>Double.NaN</code> if the array is empty.
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if the array is null.</p>
+ * <p>
+ * See {@link org.apache.commons.math.stat.descriptive.moment.Mean} for
+ * details on the computing algorithm.</p>
+ *
+ * @param values the input array
+ * @return the mean of the values or Double.NaN if the array is empty
+ * @throws IllegalArgumentException if the array is null
+ */
+ public static double mean(final double[] values) {
+ return MEAN.evaluate(values);
+ }
+
+ /**
+ * Returns the arithmetic mean of the entries in the specified portion of
+ * the input array, or <code>Double.NaN</code> if the designated subarray
+ * is empty.
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if the array is null.</p>
+ * <p>
+ * See {@link org.apache.commons.math.stat.descriptive.moment.Mean} for
+ * details on the computing algorithm.</p>
+ *
+ * @param values the input array
+ * @param begin index of the first array element to include
+ * @param length the number of elements to include
+ * @return the mean of the values or Double.NaN if length = 0
+ * @throws IllegalArgumentException if the array is null or the array index
+ * parameters are not valid
+ */
+ public static double mean(final double[] values, final int begin,
+ final int length) {
+ return MEAN.evaluate(values, begin, length);
+ }
+
+ /**
+ * Returns the geometric mean of the entries in the input array, or
+ * <code>Double.NaN</code> if the array is empty.
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if the array is null.</p>
+ * <p>
+ * See {@link org.apache.commons.math.stat.descriptive.moment.GeometricMean}
+ * for details on the computing algorithm.</p>
+ *
+ * @param values the input array
+ * @return the geometric mean of the values or Double.NaN if the array is empty
+ * @throws IllegalArgumentException if the array is null
+ */
+ public static double geometricMean(final double[] values) {
+ return GEOMETRIC_MEAN.evaluate(values);
+ }
+
+ /**
+ * Returns the geometric mean of the entries in the specified portion of
+ * the input array, or <code>Double.NaN</code> if the designated subarray
+ * is empty.
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if the array is null.</p>
+ * <p>
+ * See {@link org.apache.commons.math.stat.descriptive.moment.GeometricMean}
+ * for details on the computing algorithm.</p>
+ *
+ * @param values the input array
+ * @param begin index of the first array element to include
+ * @param length the number of elements to include
+ * @return the geometric mean of the values or Double.NaN if length = 0
+ * @throws IllegalArgumentException if the array is null or the array index
+ * parameters are not valid
+ */
+ public static double geometricMean(final double[] values, final int begin,
+ final int length) {
+ return GEOMETRIC_MEAN.evaluate(values, begin, length);
+ }
+
+
+ /**
+ * Returns the variance of the entries in the input array, or
+ * <code>Double.NaN</code> if the array is empty.
+ * <p>
+ * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for
+ * details on the computing algorithm.</p>
+ * <p>
+ * Returns 0 for a single-value (i.e. length = 1) sample.</p>
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if the array is null.</p>
+ *
+ * @param values the input array
+ * @return the variance of the values or Double.NaN if the array is empty
+ * @throws IllegalArgumentException if the array is null
+ */
+ public static double variance(final double[] values) {
+ return VARIANCE.evaluate(values);
+ }
+
+ /**
+ * Returns the variance of the entries in the specified portion of
+ * the input array, or <code>Double.NaN</code> if the designated subarray
+ * is empty.
+ * <p>
+ * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for
+ * details on the computing algorithm.</p>
+ * <p>
+ * Returns 0 for a single-value (i.e. length = 1) sample.</p>
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if the array is null or the
+ * array index parameters are not valid.</p>
+ *
+ * @param values the input array
+ * @param begin index of the first array element to include
+ * @param length the number of elements to include
+ * @return the variance of the values or Double.NaN if length = 0
+ * @throws IllegalArgumentException if the array is null or the array index
+ * parameters are not valid
+ */
+ public static double variance(final double[] values, final int begin,
+ final int length) {
+ return VARIANCE.evaluate(values, begin, length);
+ }
+
+ /**
+ * Returns the variance of the entries in the specified portion of
+ * the input array, using the precomputed mean value. Returns
+ * <code>Double.NaN</code> if the designated subarray is empty.
+ * <p>
+ * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for
+ * details on the computing algorithm.</p>
+ * <p>
+ * The formula used assumes that the supplied mean value is the arithmetic
+ * mean of the sample data, not a known population parameter. This method
+ * is supplied only to save computation when the mean has already been
+ * computed.</p>
+ * <p>
+ * Returns 0 for a single-value (i.e. length = 1) sample.</p>
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if the array is null or the
+ * array index parameters are not valid.</p>
+ *
+ * @param values the input array
+ * @param mean the precomputed mean value
+ * @param begin index of the first array element to include
+ * @param length the number of elements to include
+ * @return the variance of the values or Double.NaN if length = 0
+ * @throws IllegalArgumentException if the array is null or the array index
+ * parameters are not valid
+ */
+ public static double variance(final double[] values, final double mean,
+ final int begin, final int length) {
+ return VARIANCE.evaluate(values, mean, begin, length);
+ }
+
+ /**
+ * Returns the variance of the entries in the input array, using the
+ * precomputed mean value. Returns <code>Double.NaN</code> if the array
+ * is empty.
+ * <p>
+ * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for
+ * details on the computing algorithm.</p>
+ * <p>
+ * The formula used assumes that the supplied mean value is the arithmetic
+ * mean of the sample data, not a known population parameter. This method
+ * is supplied only to save computation when the mean has already been
+ * computed.</p>
+ * <p>
+ * Returns 0 for a single-value (i.e. length = 1) sample.</p>
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if the array is null.</p>
+ *
+ * @param values the input array
+ * @param mean the precomputed mean value
+ * @return the variance of the values or Double.NaN if the array is empty
+ * @throws IllegalArgumentException if the array is null
+ */
+ public static double variance(final double[] values, final double mean) {
+ return VARIANCE.evaluate(values, mean);
+ }
+
+ /**
+ * Returns the maximum of the entries in the input array, or
+ * <code>Double.NaN</code> if the array is empty.
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if the array is null.</p>
+ * <p>
+ * <ul>
+ * <li>The result is <code>NaN</code> iff all values are <code>NaN</code>
+ * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
+ * <li>If any of the values equals <code>Double.POSITIVE_INFINITY</code>,
+ * the result is <code>Double.POSITIVE_INFINITY.</code></li>
+ * </ul></p>
+ *
+ * @param values the input array
+ * @return the maximum of the values or Double.NaN if the array is empty
+ * @throws IllegalArgumentException if the array is null
+ */
+ public static double max(final double[] values) {
+ return MAX.evaluate(values);
+ }
+
+ /**
+ * Returns the maximum of the entries in the specified portion of
+ * the input array, or <code>Double.NaN</code> if the designated subarray
+ * is empty.
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if the array is null or
+ * the array index parameters are not valid.</p>
+ * <p>
+ * <ul>
+ * <li>The result is <code>NaN</code> iff all values are <code>NaN</code>
+ * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
+ * <li>If any of the values equals <code>Double.POSITIVE_INFINITY</code>,
+ * the result is <code>Double.POSITIVE_INFINITY.</code></li>
+ * </ul></p>
+ *
+ * @param values the input array
+ * @param begin index of the first array element to include
+ * @param length the number of elements to include
+ * @return the maximum of the values or Double.NaN if length = 0
+ * @throws IllegalArgumentException if the array is null or the array index
+ * parameters are not valid
+ */
+ public static double max(final double[] values, final int begin,
+ final int length) {
+ return MAX.evaluate(values, begin, length);
+ }
+
+ /**
+ * Returns the minimum of the entries in the input array, or
+ * <code>Double.NaN</code> if the array is empty.
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if the array is null.</p>
+ * <p>
+ * <ul>
+ * <li>The result is <code>NaN</code> iff all values are <code>NaN</code>
+ * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
+ * <li>If any of the values equals <code>Double.NEGATIVE_INFINITY</code>,
+ * the result is <code>Double.NEGATIVE_INFINITY.</code></li>
+ * </ul> </p>
+ *
+ * @param values the input array
+ * @return the minimum of the values or Double.NaN if the array is empty
+ * @throws IllegalArgumentException if the array is null
+ */
+ public static double min(final double[] values) {
+ return MIN.evaluate(values);
+ }
+
+ /**
+ * Returns the minimum of the entries in the specified portion of
+ * the input array, or <code>Double.NaN</code> if the designated subarray
+ * is empty.
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if the array is null or
+ * the array index parameters are not valid.</p>
+ * <p>
+ * <ul>
+ * <li>The result is <code>NaN</code> iff all values are <code>NaN</code>
+ * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
+ * <li>If any of the values equals <code>Double.NEGATIVE_INFINITY</code>,
+ * the result is <code>Double.NEGATIVE_INFINITY.</code></li>
+ * </ul></p>
+ *
+ * @param values the input array
+ * @param begin index of the first array element to include
+ * @param length the number of elements to include
+ * @return the minimum of the values or Double.NaN if length = 0
+ * @throws IllegalArgumentException if the array is null or the array index
+ * parameters are not valid
+ */
+ public static double min(final double[] values, final int begin,
+ final int length) {
+ return MIN.evaluate(values, begin, length);
+ }
+
+ /**
+ * Returns an estimate of the <code>p</code>th percentile of the values
+ * in the <code>values</code> array.
+ * <p>
+ * <ul>
+ * <li>Returns <code>Double.NaN</code> if <code>values</code> has length
+ * <code>0</code></li></p>
+ * <li>Returns (for any value of <code>p</code>) <code>values[0]</code>
+ * if <code>values</code> has length <code>1</code></li>
+ * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
+ * is null or p is not a valid quantile value (p must be greater than 0
+ * and less than or equal to 100)</li>
+ * </ul></p>
+ * <p>
+ * See {@link org.apache.commons.math.stat.descriptive.rank.Percentile} for
+ * a description of the percentile estimation algorithm used.</p>
+ *
+ * @param values input array of values
+ * @param p the percentile value to compute
+ * @return the percentile value or Double.NaN if the array is empty
+ * @throws IllegalArgumentException if <code>values</code> is null
+ * or p is invalid
+ */
+ public static double percentile(final double[] values, final double p) {
+ return PERCENTILE.evaluate(values,p);
+ }
+
+ /**
+ * Returns an estimate of the <code>p</code>th percentile of the values
+ * in the <code>values</code> array, starting with the element in (0-based)
+ * position <code>begin</code> in the array and including <code>length</code>
+ * values.
+ * <p>
+ * <ul>
+ * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li>
+ * <li>Returns (for any value of <code>p</code>) <code>values[begin]</code>
+ * if <code>length = 1 </code></li>
+ * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
+ * is null , <code>begin</code> or <code>length</code> is invalid, or
+ * <code>p</code> is not a valid quantile value (p must be greater than 0
+ * and less than or equal to 100)</li>
+ * </ul></p>
+ * <p>
+ * See {@link org.apache.commons.math.stat.descriptive.rank.Percentile} for
+ * a description of the percentile estimation algorithm used.</p>
+ *
+ * @param values array of input values
+ * @param p the percentile to compute
+ * @param begin the first (0-based) element to include in the computation
+ * @param length the number of array elements to include
+ * @return the percentile value
+ * @throws IllegalArgumentException if the parameters are not valid or the
+ * input array is null
+ */
+ public static double percentile(final double[] values, final int begin,
+ final int length, final double p) {
+ return PERCENTILE.evaluate(values, begin, length, p);
+ }
+
+ /**
+ * Returns the sum of the (signed) differences between corresponding elements of the
+ * input arrays -- i.e., sum(sample1[i] - sample2[i]).
+ *
+ * @param sample1 the first array
+ * @param sample2 the second array
+ * @return sum of paired differences
+ * @throws IllegalArgumentException if the arrays do not have the same
+ * (positive) length
+ */
+ public static double sumDifference(final double[] sample1, final double[] sample2)
+ throws IllegalArgumentException {
+ int n = sample1.length;
+ if (n != sample2.length) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE, n, sample2.length);
+ }
+ if (n < 1) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.INSUFFICIENT_DIMENSION, sample2.length, 1);
+ }
+ double result = 0;
+ for (int i = 0; i < n; i++) {
+ result += sample1[i] - sample2[i];
+ }
+ return result;
+ }
+
+ /**
+ * Returns the mean of the (signed) differences between corresponding elements of the
+ * input arrays -- i.e., sum(sample1[i] - sample2[i]) / sample1.length.
+ *
+ * @param sample1 the first array
+ * @param sample2 the second array
+ * @return mean of paired differences
+ * @throws IllegalArgumentException if the arrays do not have the same
+ * (positive) length
+ */
+ public static double meanDifference(final double[] sample1, final double[] sample2)
+ throws IllegalArgumentException {
+ return sumDifference(sample1, sample2) / sample1.length;
+ }
+
+ /**
+ * Returns the variance of the (signed) differences between corresponding elements of the
+ * input arrays -- i.e., var(sample1[i] - sample2[i]).
+ *
+ * @param sample1 the first array
+ * @param sample2 the second array
+ * @param meanDifference the mean difference between corresponding entries
+ * @see #meanDifference(double[],double[])
+ * @return variance of paired differences
+ * @throws IllegalArgumentException if the arrays do not have the same
+ * length or their common length is less than 2.
+ */
+ public static double varianceDifference(final double[] sample1, final double[] sample2,
+ double meanDifference) throws IllegalArgumentException {
+ double sum1 = 0d;
+ double sum2 = 0d;
+ double diff = 0d;
+ int n = sample1.length;
+ if (n != sample2.length) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE, n, sample2.length);
+ }
+ if (n < 2) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.INSUFFICIENT_DIMENSION, n, 2);
+ }
+ for (int i = 0; i < n; i++) {
+ diff = sample1[i] - sample2[i];
+ sum1 += (diff - meanDifference) *(diff - meanDifference);
+ sum2 += diff - meanDifference;
+ }
+ return (sum1 - (sum2 * sum2 / n)) / (n - 1);
+ }
+
+
+ /**
+ * Normalize (standardize) the series, so in the end it is having a mean of 0 and a standard deviation of 1.
+ *
+ * @param sample sample to normalize
+ * @return normalized (standardized) sample
+ * @since 2.2
+ */
+ public static double[] normalize(final double[] sample) {
+ DescriptiveStatistics stats = new DescriptiveStatistics();
+
+ // Add the data from the series to stats
+ for (int i = 0; i < sample.length; i++) {
+ stats.addValue(sample[i]);
+ }
+
+ // Compute mean and standard deviation
+ double mean = stats.getMean();
+ double standardDeviation = stats.getStandardDeviation();
+
+ // initialize the standardizedSample, which has the same length as the sample
+ double[] standardizedSample = new double[sample.length];
+
+ for (int i = 0; i < sample.length; i++) {
+ // z = (x- mean)/standardDeviation
+ standardizedSample[i] = (sample[i] - mean) / standardDeviation;
+ }
+ return standardizedSample;
+ }
+
+}
diff --git a/src/main/java/org/apache/commons/math/stat/clustering/Cluster.java b/src/main/java/org/apache/commons/math/stat/clustering/Cluster.java
new file mode 100644
index 0000000..f4913d3
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/clustering/Cluster.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.math.stat.clustering;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Cluster holding a set of {@link Clusterable} points.
+ * @param <T> the type of points that can be clustered
+ * @version $Revision: 771076 $ $Date: 2009-05-03 18:28:48 +0200 (dim. 03 mai 2009) $
+ * @since 2.0
+ */
+public class Cluster<T extends Clusterable<T>> implements Serializable {
+
+ /** Serializable version identifier. */
+ private static final long serialVersionUID = -3442297081515880464L;
+
+ /** The points contained in this cluster. */
+ private final List<T> points;
+
+ /** Center of the cluster. */
+ private final T center;
+
+ /**
+ * Build a cluster centered at a specified point.
+ * @param center the point which is to be the center of this cluster
+ */
+ public Cluster(final T center) {
+ this.center = center;
+ points = new ArrayList<T>();
+ }
+
+ /**
+ * Add a point to this cluster.
+ * @param point point to add
+ */
+ public void addPoint(final T point) {
+ points.add(point);
+ }
+
+ /**
+ * Get the points contained in the cluster.
+ * @return points contained in the cluster
+ */
+ public List<T> getPoints() {
+ return points;
+ }
+
+ /**
+ * Get the point chosen to be the center of this cluster.
+ * @return chosen cluster center
+ */
+ public T getCenter() {
+ return center;
+ }
+
+}
diff --git a/src/main/java/org/apache/commons/math/stat/clustering/Clusterable.java b/src/main/java/org/apache/commons/math/stat/clustering/Clusterable.java
new file mode 100644
index 0000000..65132e6
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/clustering/Clusterable.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.math.stat.clustering;
+
+import java.util.Collection;
+
+/**
+ * Interface for points that can be clustered together.
+ * @param <T> the type of point that can be clustered
+ * @version $Revision: 811685 $ $Date: 2009-09-05 19:36:48 +0200 (sam. 05 sept. 2009) $
+ * @since 2.0
+ */
+public interface Clusterable<T> {
+
+ /**
+ * Returns the distance from the given point.
+ *
+ * @param p the point to compute the distance from
+ * @return the distance from the given point
+ */
+ double distanceFrom(T p);
+
+ /**
+ * Returns the centroid of the given Collection of points.
+ *
+ * @param p the Collection of points to compute the centroid of
+ * @return the centroid of the given Collection of Points
+ */
+ T centroidOf(Collection<T> p);
+
+}
diff --git a/src/main/java/org/apache/commons/math/stat/clustering/EuclideanIntegerPoint.java b/src/main/java/org/apache/commons/math/stat/clustering/EuclideanIntegerPoint.java
new file mode 100644
index 0000000..7fec0ff
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/clustering/EuclideanIntegerPoint.java
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.math.stat.clustering;
+
+import java.io.Serializable;
+import java.util.Collection;
+
+import org.apache.commons.math.util.MathUtils;
+
+/**
+ * A simple implementation of {@link Clusterable} for points with integer coordinates.
+ * @version $Revision: 1042376 $ $Date: 2010-12-05 16:54:55 +0100 (dim. 05 déc. 2010) $
+ * @since 2.0
+ */
+public class EuclideanIntegerPoint implements Clusterable<EuclideanIntegerPoint>, Serializable {
+
+ /** Serializable version identifier. */
+ private static final long serialVersionUID = 3946024775784901369L;
+
+ /** Point coordinates. */
+ private final int[] point;
+
+ /**
+ * Build an instance wrapping an integer array.
+ * <p>The wrapped array is referenced, it is <em>not</em> copied.</p>
+ * @param point the n-dimensional point in integer space
+ */
+ public EuclideanIntegerPoint(final int[] point) {
+ this.point = point;
+ }
+
+ /**
+ * Get the n-dimensional point in integer space.
+ * @return a reference (not a copy!) to the wrapped array
+ */
+ public int[] getPoint() {
+ return point;
+ }
+
+ /** {@inheritDoc} */
+ public double distanceFrom(final EuclideanIntegerPoint p) {
+ return MathUtils.distance(point, p.getPoint());
+ }
+
+ /** {@inheritDoc} */
+ public EuclideanIntegerPoint centroidOf(final Collection<EuclideanIntegerPoint> points) {
+ int[] centroid = new int[getPoint().length];
+ for (EuclideanIntegerPoint p : points) {
+ for (int i = 0; i < centroid.length; i++) {
+ centroid[i] += p.getPoint()[i];
+ }
+ }
+ for (int i = 0; i < centroid.length; i++) {
+ centroid[i] /= points.size();
+ }
+ return new EuclideanIntegerPoint(centroid);
+ }
+
+ /** {@inheritDoc} */
+ @Override
+ public boolean equals(final Object other) {
+ if (!(other instanceof EuclideanIntegerPoint)) {
+ return false;
+ }
+ final int[] otherPoint = ((EuclideanIntegerPoint) other).getPoint();
+ if (point.length != otherPoint.length) {
+ return false;
+ }
+ for (int i = 0; i < point.length; i++) {
+ if (point[i] != otherPoint[i]) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /** {@inheritDoc} */
+ @Override
+ public int hashCode() {
+ int hashCode = 0;
+ for (Integer i : point) {
+ hashCode += i.hashCode() * 13 + 7;
+ }
+ return hashCode;
+ }
+
+ /**
+ * {@inheritDoc}
+ * @since 2.1
+ */
+ @Override
+ public String toString() {
+ final StringBuilder buff = new StringBuilder("(");
+ final int[] coordinates = getPoint();
+ for (int i = 0; i < coordinates.length; i++) {
+ buff.append(coordinates[i]);
+ if (i < coordinates.length - 1) {
+ buff.append(",");
+ }
+ }
+ buff.append(")");
+ return buff.toString();
+ }
+
+}
diff --git a/src/main/java/org/apache/commons/math/stat/clustering/KMeansPlusPlusClusterer.java b/src/main/java/org/apache/commons/math/stat/clustering/KMeansPlusPlusClusterer.java
new file mode 100644
index 0000000..eb61866
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/clustering/KMeansPlusPlusClusterer.java
@@ -0,0 +1,333 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.math.stat.clustering;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.commons.math.exception.ConvergenceException;
+import org.apache.commons.math.exception.util.LocalizedFormats;
+import org.apache.commons.math.stat.descriptive.moment.Variance;
+
+/**
+ * Clustering algorithm based on David Arthur and Sergei Vassilvitski k-means++ algorithm.
+ * @param <T> type of the points to cluster
+ * @see <a href="http://en.wikipedia.org/wiki/K-means%2B%2B">K-means++ (wikipedia)</a>
+ * @version $Revision: 1054333 $ $Date: 2011-01-02 01:34:58 +0100 (dim. 02 janv. 2011) $
+ * @since 2.0
+ */
+public class KMeansPlusPlusClusterer<T extends Clusterable<T>> {
+
+ /** Strategies to use for replacing an empty cluster. */
+ public static enum EmptyClusterStrategy {
+
+ /** Split the cluster with largest distance variance. */
+ LARGEST_VARIANCE,
+
+ /** Split the cluster with largest number of points. */
+ LARGEST_POINTS_NUMBER,
+
+ /** Create a cluster around the point farthest from its centroid. */
+ FARTHEST_POINT,
+
+ /** Generate an error. */
+ ERROR
+
+ }
+
+ /** Random generator for choosing initial centers. */
+ private final Random random;
+
+ /** Selected strategy for empty clusters. */
+ private final EmptyClusterStrategy emptyStrategy;
+
+ /** Build a clusterer.
+ * <p>
+ * The default strategy for handling empty clusters that may appear during
+ * algorithm iterations is to split the cluster with largest distance variance.
+ * </p>
+ * @param random random generator to use for choosing initial centers
+ */
+ public KMeansPlusPlusClusterer(final Random random) {
+ this(random, EmptyClusterStrategy.LARGEST_VARIANCE);
+ }
+
+ /** Build a clusterer.
+ * @param random random generator to use for choosing initial centers
+ * @param emptyStrategy strategy to use for handling empty clusters that
+ * may appear during algorithm iterations
+ * @since 2.2
+ */
+ public KMeansPlusPlusClusterer(final Random random, final EmptyClusterStrategy emptyStrategy) {
+ this.random = random;
+ this.emptyStrategy = emptyStrategy;
+ }
+
+ /**
+ * Runs the K-means++ clustering algorithm.
+ *
+ * @param points the points to cluster
+ * @param k the number of clusters to split the data into
+ * @param maxIterations the maximum number of iterations to run the algorithm
+ * for. If negative, no maximum will be used
+ * @return a list of clusters containing the points
+ */
+ public List<Cluster<T>> cluster(final Collection<T> points,
+ final int k, final int maxIterations) {
+ // create the initial clusters
+ List<Cluster<T>> clusters = chooseInitialCenters(points, k, random);
+ assignPointsToClusters(clusters, points);
+
+ // iterate through updating the centers until we're done
+ final int max = (maxIterations < 0) ? Integer.MAX_VALUE : maxIterations;
+ for (int count = 0; count < max; count++) {
+ boolean clusteringChanged = false;
+ List<Cluster<T>> newClusters = new ArrayList<Cluster<T>>();
+ for (final Cluster<T> cluster : clusters) {
+ final T newCenter;
+ if (cluster.getPoints().isEmpty()) {
+ switch (emptyStrategy) {
+ case LARGEST_VARIANCE :
+ newCenter = getPointFromLargestVarianceCluster(clusters);
+ break;
+ case LARGEST_POINTS_NUMBER :
+ newCenter = getPointFromLargestNumberCluster(clusters);
+ break;
+ case FARTHEST_POINT :
+ newCenter = getFarthestPoint(clusters);
+ break;
+ default :
+ throw new ConvergenceException(LocalizedFormats.EMPTY_CLUSTER_IN_K_MEANS);
+ }
+ clusteringChanged = true;
+ } else {
+ newCenter = cluster.getCenter().centroidOf(cluster.getPoints());
+ if (!newCenter.equals(cluster.getCenter())) {
+ clusteringChanged = true;
+ }
+ }
+ newClusters.add(new Cluster<T>(newCenter));
+ }
+ if (!clusteringChanged) {
+ return clusters;
+ }
+ assignPointsToClusters(newClusters, points);
+ clusters = newClusters;
+ }
+ return clusters;
+ }
+
+ /**
+ * Adds the given points to the closest {@link Cluster}.
+ *
+ * @param <T> type of the points to cluster
+ * @param clusters the {@link Cluster}s to add the points to
+ * @param points the points to add to the given {@link Cluster}s
+ */
+ private static <T extends Clusterable<T>> void
+ assignPointsToClusters(final Collection<Cluster<T>> clusters, final Collection<T> points) {
+ for (final T p : points) {
+ Cluster<T> cluster = getNearestCluster(clusters, p);
+ cluster.addPoint(p);
+ }
+ }
+
+ /**
+ * Use K-means++ to choose the initial centers.
+ *
+ * @param <T> type of the points to cluster
+ * @param points the points to choose the initial centers from
+ * @param k the number of centers to choose
+ * @param random random generator to use
+ * @return the initial centers
+ */
+ private static <T extends Clusterable<T>> List<Cluster<T>>
+ chooseInitialCenters(final Collection<T> points, final int k, final Random random) {
+
+ final List<T> pointSet = new ArrayList<T>(points);
+ final List<Cluster<T>> resultSet = new ArrayList<Cluster<T>>();
+
+ // Choose one center uniformly at random from among the data points.
+ final T firstPoint = pointSet.remove(random.nextInt(pointSet.size()));
+ resultSet.add(new Cluster<T>(firstPoint));
+
+ final double[] dx2 = new double[pointSet.size()];
+ while (resultSet.size() < k) {
+ // For each data point x, compute D(x), the distance between x and
+ // the nearest center that has already been chosen.
+ int sum = 0;
+ for (int i = 0; i < pointSet.size(); i++) {
+ final T p = pointSet.get(i);
+ final Cluster<T> nearest = getNearestCluster(resultSet, p);
+ final double d = p.distanceFrom(nearest.getCenter());
+ sum += d * d;
+ dx2[i] = sum;
+ }
+
+ // Add one new data point as a center. Each point x is chosen with
+ // probability proportional to D(x)2
+ final double r = random.nextDouble() * sum;
+ for (int i = 0 ; i < dx2.length; i++) {
+ if (dx2[i] >= r) {
+ final T p = pointSet.remove(i);
+ resultSet.add(new Cluster<T>(p));
+ break;
+ }
+ }
+ }
+
+ return resultSet;
+
+ }
+
+ /**
+ * Get a random point from the {@link Cluster} with the largest distance variance.
+ *
+ * @param clusters the {@link Cluster}s to search
+ * @return a random point from the selected cluster
+ */
+ private T getPointFromLargestVarianceCluster(final Collection<Cluster<T>> clusters) {
+
+ double maxVariance = Double.NEGATIVE_INFINITY;
+ Cluster<T> selected = null;
+ for (final Cluster<T> cluster : clusters) {
+ if (!cluster.getPoints().isEmpty()) {
+
+ // compute the distance variance of the current cluster
+ final T center = cluster.getCenter();
+ final Variance stat = new Variance();
+ for (final T point : cluster.getPoints()) {
+ stat.increment(point.distanceFrom(center));
+ }
+ final double variance = stat.getResult();
+
+ // select the cluster with the largest variance
+ if (variance > maxVariance) {
+ maxVariance = variance;
+ selected = cluster;
+ }
+
+ }
+ }
+
+ // did we find at least one non-empty cluster ?
+ if (selected == null) {
+ throw new ConvergenceException(LocalizedFormats.EMPTY_CLUSTER_IN_K_MEANS);
+ }
+
+ // extract a random point from the cluster
+ final List<T> selectedPoints = selected.getPoints();
+ return selectedPoints.remove(random.nextInt(selectedPoints.size()));
+
+ }
+
+ /**
+ * Get a random point from the {@link Cluster} with the largest number of points
+ *
+ * @param clusters the {@link Cluster}s to search
+ * @return a random point from the selected cluster
+ */
+ private T getPointFromLargestNumberCluster(final Collection<Cluster<T>> clusters) {
+
+ int maxNumber = 0;
+ Cluster<T> selected = null;
+ for (final Cluster<T> cluster : clusters) {
+
+ // get the number of points of the current cluster
+ final int number = cluster.getPoints().size();
+
+ // select the cluster with the largest number of points
+ if (number > maxNumber) {
+ maxNumber = number;
+ selected = cluster;
+ }
+
+ }
+
+ // did we find at least one non-empty cluster ?
+ if (selected == null) {
+ throw new ConvergenceException(LocalizedFormats.EMPTY_CLUSTER_IN_K_MEANS);
+ }
+
+ // extract a random point from the cluster
+ final List<T> selectedPoints = selected.getPoints();
+ return selectedPoints.remove(random.nextInt(selectedPoints.size()));
+
+ }
+
+ /**
+ * Get the point farthest to its cluster center
+ *
+ * @param clusters the {@link Cluster}s to search
+ * @return point farthest to its cluster center
+ */
+ private T getFarthestPoint(final Collection<Cluster<T>> clusters) {
+
+ double maxDistance = Double.NEGATIVE_INFINITY;
+ Cluster<T> selectedCluster = null;
+ int selectedPoint = -1;
+ for (final Cluster<T> cluster : clusters) {
+
+ // get the farthest point
+ final T center = cluster.getCenter();
+ final List<T> points = cluster.getPoints();
+ for (int i = 0; i < points.size(); ++i) {
+ final double distance = points.get(i).distanceFrom(center);
+ if (distance > maxDistance) {
+ maxDistance = distance;
+ selectedCluster = cluster;
+ selectedPoint = i;
+ }
+ }
+
+ }
+
+ // did we find at least one non-empty cluster ?
+ if (selectedCluster == null) {
+ throw new ConvergenceException(LocalizedFormats.EMPTY_CLUSTER_IN_K_MEANS);
+ }
+
+ return selectedCluster.getPoints().remove(selectedPoint);
+
+ }
+
+ /**
+ * Returns the nearest {@link Cluster} to the given point
+ *
+ * @param <T> type of the points to cluster
+ * @param clusters the {@link Cluster}s to search
+ * @param point the point to find the nearest {@link Cluster} for
+ * @return the nearest {@link Cluster} to the given point
+ */
+ private static <T extends Clusterable<T>> Cluster<T>
+ getNearestCluster(final Collection<Cluster<T>> clusters, final T point) {
+ double minDistance = Double.MAX_VALUE;
+ Cluster<T> minCluster = null;
+ for (final Cluster<T> c : clusters) {
+ final double distance = point.distanceFrom(c.getCenter());
+ if (distance < minDistance) {
+ minDistance = distance;
+ minCluster = c;
+ }
+ }
+ return minCluster;
+ }
+
+}
diff --git a/src/main/java/org/apache/commons/math/stat/clustering/package.html b/src/main/java/org/apache/commons/math/stat/clustering/package.html
new file mode 100644
index 0000000..21e9079
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/clustering/package.html
@@ -0,0 +1,20 @@
+<html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+ <!-- $Revision: 770979 $ $Date: 2009-05-02 21:34:51 +0200 (sam. 02 mai 2009) $ -->
+ <body>Clustering algorithms</body>
+</html>
diff --git a/src/main/java/org/apache/commons/math/stat/correlation/Covariance.java b/src/main/java/org/apache/commons/math/stat/correlation/Covariance.java
new file mode 100644
index 0000000..393a02d
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/correlation/Covariance.java
@@ -0,0 +1,274 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.correlation;
+
+import org.apache.commons.math.MathRuntimeException;
+import org.apache.commons.math.exception.util.LocalizedFormats;
+import org.apache.commons.math.linear.RealMatrix;
+import org.apache.commons.math.linear.BlockRealMatrix;
+import org.apache.commons.math.stat.descriptive.moment.Mean;
+import org.apache.commons.math.stat.descriptive.moment.Variance;
+
+/**
+ * Computes covariances for pairs of arrays or columns of a matrix.
+ *
+ * <p>The constructors that take <code>RealMatrix</code> or
+ * <code>double[][]</code> arguments generate covariance matrices. The
+ * columns of the input matrices are assumed to represent variable values.</p>
+ *
+ * <p>The constructor argument <code>biasCorrected</code> determines whether or
+ * not computed covariances are bias-corrected.</p>
+ *
+ * <p>Unbiased covariances are given by the formula</p>
+ * <code>cov(X, Y) = &Sigma;[(x<sub>i</sub> - E(X))(y<sub>i</sub> - E(Y))] / (n - 1)</code>
+ * where <code>E(X)</code> is the mean of <code>X</code> and <code>E(Y)</code>
+ * is the mean of the <code>Y</code> values.
+ *
+ * <p>Non-bias-corrected estimates use <code>n</code> in place of <code>n - 1</code>
+ *
+ * @version $Revision: 983921 $ $Date: 2010-08-10 12:46:06 +0200 (mar. 10 août 2010) $
+ * @since 2.0
+ */
+public class Covariance {
+
+ /** covariance matrix */
+ private final RealMatrix covarianceMatrix;
+
+ /**
+ * Create an empty covariance matrix.
+ */
+ /** Number of observations (length of covariate vectors) */
+ private final int n;
+
+ /**
+ * Create a Covariance with no data
+ */
+ public Covariance() {
+ super();
+ covarianceMatrix = null;
+ n = 0;
+ }
+
+ /**
+ * Create a Covariance matrix from a rectangular array
+ * whose columns represent covariates.
+ *
+ * <p>The <code>biasCorrected</code> parameter determines whether or not
+ * covariance estimates are bias-corrected.</p>
+ *
+ * <p>The input array must be rectangular with at least two columns
+ * and two rows.</p>
+ *
+ * @param data rectangular array with columns representing covariates
+ * @param biasCorrected true means covariances are bias-corrected
+ * @throws IllegalArgumentException if the input data array is not
+ * rectangular with at least two rows and two columns.
+ */
+ public Covariance(double[][] data, boolean biasCorrected) {
+ this(new BlockRealMatrix(data), biasCorrected);
+ }
+
+ /**
+ * Create a Covariance matrix from a rectangular array
+ * whose columns represent covariates.
+ *
+ * <p>The input array must be rectangular with at least two columns
+ * and two rows</p>
+ *
+ * @param data rectangular array with columns representing covariates
+ * @throws IllegalArgumentException if the input data array is not
+ * rectangular with at least two rows and two columns.
+ */
+ public Covariance(double[][] data) {
+ this(data, true);
+ }
+
+ /**
+ * Create a covariance matrix from a matrix whose columns
+ * represent covariates.
+ *
+ * <p>The <code>biasCorrected</code> parameter determines whether or not
+ * covariance estimates are bias-corrected.</p>
+ *
+ * <p>The matrix must have at least two columns and two rows</p>
+ *
+ * @param matrix matrix with columns representing covariates
+ * @param biasCorrected true means covariances are bias-corrected
+ * @throws IllegalArgumentException if the input matrix does not have
+ * at least two rows and two columns
+ */
+ public Covariance(RealMatrix matrix, boolean biasCorrected) {
+ checkSufficientData(matrix);
+ n = matrix.getRowDimension();
+ covarianceMatrix = computeCovarianceMatrix(matrix, biasCorrected);
+ }
+
+ /**
+ * Create a covariance matrix from a matrix whose columns
+ * represent covariates.
+ *
+ * <p>The matrix must have at least two columns and two rows</p>
+ *
+ * @param matrix matrix with columns representing covariates
+ * @throws IllegalArgumentException if the input matrix does not have
+ * at least two rows and two columns
+ */
+ public Covariance(RealMatrix matrix) {
+ this(matrix, true);
+ }
+
+ /**
+ * Returns the covariance matrix
+ *
+ * @return covariance matrix
+ */
+ public RealMatrix getCovarianceMatrix() {
+ return covarianceMatrix;
+ }
+
+ /**
+ * Returns the number of observations (length of covariate vectors)
+ *
+ * @return number of observations
+ */
+
+ public int getN() {
+ return n;
+ }
+
+ /**
+ * Compute a covariance matrix from a matrix whose columns represent
+ * covariates.
+ * @param matrix input matrix (must have at least two columns and two rows)
+ * @param biasCorrected determines whether or not covariance estimates are bias-corrected
+ * @return covariance matrix
+ */
+ protected RealMatrix computeCovarianceMatrix(RealMatrix matrix, boolean biasCorrected) {
+ int dimension = matrix.getColumnDimension();
+ Variance variance = new Variance(biasCorrected);
+ RealMatrix outMatrix = new BlockRealMatrix(dimension, dimension);
+ for (int i = 0; i < dimension; i++) {
+ for (int j = 0; j < i; j++) {
+ double cov = covariance(matrix.getColumn(i), matrix.getColumn(j), biasCorrected);
+ outMatrix.setEntry(i, j, cov);
+ outMatrix.setEntry(j, i, cov);
+ }
+ outMatrix.setEntry(i, i, variance.evaluate(matrix.getColumn(i)));
+ }
+ return outMatrix;
+ }
+
+ /**
+ * Create a covariance matrix from a matrix whose columns represent
+ * covariates. Covariances are computed using the bias-corrected formula.
+ * @param matrix input matrix (must have at least two columns and two rows)
+ * @return covariance matrix
+ * @see #Covariance
+ */
+ protected RealMatrix computeCovarianceMatrix(RealMatrix matrix) {
+ return computeCovarianceMatrix(matrix, true);
+ }
+
+ /**
+ * Compute a covariance matrix from a rectangular array whose columns represent
+ * covariates.
+ * @param data input array (must have at least two columns and two rows)
+ * @param biasCorrected determines whether or not covariance estimates are bias-corrected
+ * @return covariance matrix
+ */
+ protected RealMatrix computeCovarianceMatrix(double[][] data, boolean biasCorrected) {
+ return computeCovarianceMatrix(new BlockRealMatrix(data), biasCorrected);
+ }
+
+ /**
+ * Create a covariance matrix from a rectangual array whose columns represent
+ * covariates. Covariances are computed using the bias-corrected formula.
+ * @param data input array (must have at least two columns and two rows)
+ * @return covariance matrix
+ * @see #Covariance
+ */
+ protected RealMatrix computeCovarianceMatrix(double[][] data) {
+ return computeCovarianceMatrix(data, true);
+ }
+
+ /**
+ * Computes the covariance between the two arrays.
+ *
+ * <p>Array lengths must match and the common length must be at least 2.</p>
+ *
+ * @param xArray first data array
+ * @param yArray second data array
+ * @param biasCorrected if true, returned value will be bias-corrected
+ * @return returns the covariance for the two arrays
+ * @throws IllegalArgumentException if the arrays lengths do not match or
+ * there is insufficient data
+ */
+ public double covariance(final double[] xArray, final double[] yArray, boolean biasCorrected)
+ throws IllegalArgumentException {
+ Mean mean = new Mean();
+ double result = 0d;
+ int length = xArray.length;
+ if (length != yArray.length) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE, length, yArray.length);
+ } else if (length < 2) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.INSUFFICIENT_DIMENSION, length, 2);
+ } else {
+ double xMean = mean.evaluate(xArray);
+ double yMean = mean.evaluate(yArray);
+ for (int i = 0; i < length; i++) {
+ double xDev = xArray[i] - xMean;
+ double yDev = yArray[i] - yMean;
+ result += (xDev * yDev - result) / (i + 1);
+ }
+ }
+ return biasCorrected ? result * ((double) length / (double)(length - 1)) : result;
+ }
+
+ /**
+ * Computes the covariance between the two arrays, using the bias-corrected
+ * formula.
+ *
+ * <p>Array lengths must match and the common length must be at least 2.</p>
+ *
+ * @param xArray first data array
+ * @param yArray second data array
+ * @return returns the covariance for the two arrays
+ * @throws IllegalArgumentException if the arrays lengths do not match or
+ * there is insufficient data
+ */
+ public double covariance(final double[] xArray, final double[] yArray)
+ throws IllegalArgumentException {
+ return covariance(xArray, yArray, true);
+ }
+
+ /**
+ * Throws IllegalArgumentException of the matrix does not have at least
+ * two columns and two rows
+ * @param matrix matrix to check
+ */
+ private void checkSufficientData(final RealMatrix matrix) {
+ int nRows = matrix.getRowDimension();
+ int nCols = matrix.getColumnDimension();
+ if (nRows < 2 || nCols < 2) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.INSUFFICIENT_ROWS_AND_COLUMNS,
+ nRows, nCols);
+ }
+ }
+}
diff --git a/src/main/java/org/apache/commons/math/stat/correlation/PearsonsCorrelation.java b/src/main/java/org/apache/commons/math/stat/correlation/PearsonsCorrelation.java
new file mode 100644
index 0000000..6467c69
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/correlation/PearsonsCorrelation.java
@@ -0,0 +1,285 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.correlation;
+
+import org.apache.commons.math.MathException;
+import org.apache.commons.math.MathRuntimeException;
+import org.apache.commons.math.distribution.TDistribution;
+import org.apache.commons.math.distribution.TDistributionImpl;
+import org.apache.commons.math.exception.util.LocalizedFormats;
+import org.apache.commons.math.exception.NullArgumentException;
+import org.apache.commons.math.exception.DimensionMismatchException;
+import org.apache.commons.math.linear.RealMatrix;
+import org.apache.commons.math.linear.BlockRealMatrix;
+import org.apache.commons.math.stat.regression.SimpleRegression;
+import org.apache.commons.math.util.FastMath;
+
+/**
+ * Computes Pearson's product-moment correlation coefficients for pairs of arrays
+ * or columns of a matrix.
+ *
+ * <p>The constructors that take <code>RealMatrix</code> or
+ * <code>double[][]</code> arguments generate correlation matrices. The
+ * columns of the input matrices are assumed to represent variable values.
+ * Correlations are given by the formula</p>
+ * <code>cor(X, Y) = &Sigma;[(x<sub>i</sub> - E(X))(y<sub>i</sub> - E(Y))] / [(n - 1)s(X)s(Y)]</code>
+ * where <code>E(X)</code> is the mean of <code>X</code>, <code>E(Y)</code>
+ * is the mean of the <code>Y</code> values and s(X), s(Y) are standard deviations.
+ *
+ * @version $Revision: 990655 $ $Date: 2010-08-29 23:49:40 +0200 (dim. 29 août 2010) $
+ * @since 2.0
+ */
+public class PearsonsCorrelation {
+
+ /** correlation matrix */
+ private final RealMatrix correlationMatrix;
+
+ /** number of observations */
+ private final int nObs;
+
+ /**
+ * Create a PearsonsCorrelation instance without data
+ */
+ public PearsonsCorrelation() {
+ super();
+ correlationMatrix = null;
+ nObs = 0;
+ }
+
+ /**
+ * Create a PearsonsCorrelation from a rectangular array
+ * whose columns represent values of variables to be correlated.
+ *
+ * @param data rectangular array with columns representing variables
+ * @throws IllegalArgumentException if the input data array is not
+ * rectangular with at least two rows and two columns.
+ */
+ public PearsonsCorrelation(double[][] data) {
+ this(new BlockRealMatrix(data));
+ }
+
+ /**
+ * Create a PearsonsCorrelation from a RealMatrix whose columns
+ * represent variables to be correlated.
+ *
+ * @param matrix matrix with columns representing variables to correlate
+ */
+ public PearsonsCorrelation(RealMatrix matrix) {
+ checkSufficientData(matrix);
+ nObs = matrix.getRowDimension();
+ correlationMatrix = computeCorrelationMatrix(matrix);
+ }
+
+ /**
+ * Create a PearsonsCorrelation from a {@link Covariance}. The correlation
+ * matrix is computed by scaling the Covariance's covariance matrix.
+ * The Covariance instance must have been created from a data matrix with
+ * columns representing variable values.
+ *
+ * @param covariance Covariance instance
+ */
+ public PearsonsCorrelation(Covariance covariance) {
+ RealMatrix covarianceMatrix = covariance.getCovarianceMatrix();
+ if (covarianceMatrix == null) {
+ throw new NullArgumentException(LocalizedFormats.COVARIANCE_MATRIX);
+ }
+ nObs = covariance.getN();
+ correlationMatrix = covarianceToCorrelation(covarianceMatrix);
+ }
+
+ /**
+ * Create a PearsonsCorrelation from a covariance matrix. The correlation
+ * matrix is computed by scaling the covariance matrix.
+ *
+ * @param covarianceMatrix covariance matrix
+ * @param numberOfObservations the number of observations in the dataset used to compute
+ * the covariance matrix
+ */
+ public PearsonsCorrelation(RealMatrix covarianceMatrix, int numberOfObservations) {
+ nObs = numberOfObservations;
+ correlationMatrix = covarianceToCorrelation(covarianceMatrix);
+
+ }
+
+ /**
+ * Returns the correlation matrix
+ *
+ * @return correlation matrix
+ */
+ public RealMatrix getCorrelationMatrix() {
+ return correlationMatrix;
+ }
+
+ /**
+ * Returns a matrix of standard errors associated with the estimates
+ * in the correlation matrix.<br/>
+ * <code>getCorrelationStandardErrors().getEntry(i,j)</code> is the standard
+ * error associated with <code>getCorrelationMatrix.getEntry(i,j)</code>
+ * <p>The formula used to compute the standard error is <br/>
+ * <code>SE<sub>r</sub> = ((1 - r<sup>2</sup>) / (n - 2))<sup>1/2</sup></code>
+ * where <code>r</code> is the estimated correlation coefficient and
+ * <code>n</code> is the number of observations in the source dataset.</p>
+ *
+ * @return matrix of correlation standard errors
+ */
+ public RealMatrix getCorrelationStandardErrors() {
+ int nVars = correlationMatrix.getColumnDimension();
+ double[][] out = new double[nVars][nVars];
+ for (int i = 0; i < nVars; i++) {
+ for (int j = 0; j < nVars; j++) {
+ double r = correlationMatrix.getEntry(i, j);
+ out[i][j] = FastMath.sqrt((1 - r * r) /(nObs - 2));
+ }
+ }
+ return new BlockRealMatrix(out);
+ }
+
+ /**
+ * Returns a matrix of p-values associated with the (two-sided) null
+ * hypothesis that the corresponding correlation coefficient is zero.
+ * <p><code>getCorrelationPValues().getEntry(i,j)</code> is the probability
+ * that a random variable distributed as <code>t<sub>n-2</sub></code> takes
+ * a value with absolute value greater than or equal to <br>
+ * <code>|r|((n - 2) / (1 - r<sup>2</sup>))<sup>1/2</sup></code></p>
+ * <p>The values in the matrix are sometimes referred to as the
+ * <i>significance</i> of the corresponding correlation coefficients.</p>
+ *
+ * @return matrix of p-values
+ * @throws MathException if an error occurs estimating probabilities
+ */
+ public RealMatrix getCorrelationPValues() throws MathException {
+ TDistribution tDistribution = new TDistributionImpl(nObs - 2);
+ int nVars = correlationMatrix.getColumnDimension();
+ double[][] out = new double[nVars][nVars];
+ for (int i = 0; i < nVars; i++) {
+ for (int j = 0; j < nVars; j++) {
+ if (i == j) {
+ out[i][j] = 0d;
+ } else {
+ double r = correlationMatrix.getEntry(i, j);
+ double t = FastMath.abs(r * FastMath.sqrt((nObs - 2)/(1 - r * r)));
+ out[i][j] = 2 * tDistribution.cumulativeProbability(-t);
+ }
+ }
+ }
+ return new BlockRealMatrix(out);
+ }
+
+
+ /**
+ * Computes the correlation matrix for the columns of the
+ * input matrix.
+ *
+ * @param matrix matrix with columns representing variables to correlate
+ * @return correlation matrix
+ */
+ public RealMatrix computeCorrelationMatrix(RealMatrix matrix) {
+ int nVars = matrix.getColumnDimension();
+ RealMatrix outMatrix = new BlockRealMatrix(nVars, nVars);
+ for (int i = 0; i < nVars; i++) {
+ for (int j = 0; j < i; j++) {
+ double corr = correlation(matrix.getColumn(i), matrix.getColumn(j));
+ outMatrix.setEntry(i, j, corr);
+ outMatrix.setEntry(j, i, corr);
+ }
+ outMatrix.setEntry(i, i, 1d);
+ }
+ return outMatrix;
+ }
+
+ /**
+ * Computes the correlation matrix for the columns of the
+ * input rectangular array. The colums of the array represent values
+ * of variables to be correlated.
+ *
+ * @param data matrix with columns representing variables to correlate
+ * @return correlation matrix
+ */
+ public RealMatrix computeCorrelationMatrix(double[][] data) {
+ return computeCorrelationMatrix(new BlockRealMatrix(data));
+ }
+
+ /**
+ * Computes the Pearson's product-moment correlation coefficient between the two arrays.
+ *
+ * </p>Throws IllegalArgumentException if the arrays do not have the same length
+ * or their common length is less than 2</p>
+ *
+ * @param xArray first data array
+ * @param yArray second data array
+ * @return Returns Pearson's correlation coefficient for the two arrays
+ * @throws IllegalArgumentException if the arrays lengths do not match or
+ * there is insufficient data
+ */
+ public double correlation(final double[] xArray, final double[] yArray) throws IllegalArgumentException {
+ SimpleRegression regression = new SimpleRegression();
+ if (xArray.length != yArray.length) {
+ throw new DimensionMismatchException(xArray.length, yArray.length);
+ } else if (xArray.length < 2) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.INSUFFICIENT_DIMENSION, xArray.length, 2);
+ } else {
+ for(int i=0; i<xArray.length; i++) {
+ regression.addData(xArray[i], yArray[i]);
+ }
+ return regression.getR();
+ }
+ }
+
+ /**
+ * Derives a correlation matrix from a covariance matrix.
+ *
+ * <p>Uses the formula <br/>
+ * <code>r(X,Y) = cov(X,Y)/s(X)s(Y)</code> where
+ * <code>r(&middot,&middot;)</code> is the correlation coefficient and
+ * <code>s(&middot;)</code> means standard deviation.</p>
+ *
+ * @param covarianceMatrix the covariance matrix
+ * @return correlation matrix
+ */
+ public RealMatrix covarianceToCorrelation(RealMatrix covarianceMatrix) {
+ int nVars = covarianceMatrix.getColumnDimension();
+ RealMatrix outMatrix = new BlockRealMatrix(nVars, nVars);
+ for (int i = 0; i < nVars; i++) {
+ double sigma = FastMath.sqrt(covarianceMatrix.getEntry(i, i));
+ outMatrix.setEntry(i, i, 1d);
+ for (int j = 0; j < i; j++) {
+ double entry = covarianceMatrix.getEntry(i, j) /
+ (sigma * FastMath.sqrt(covarianceMatrix.getEntry(j, j)));
+ outMatrix.setEntry(i, j, entry);
+ outMatrix.setEntry(j, i, entry);
+ }
+ }
+ return outMatrix;
+ }
+
+ /**
+ * Throws IllegalArgumentException of the matrix does not have at least
+ * two columns and two rows
+ *
+ * @param matrix matrix to check for sufficiency
+ */
+ private void checkSufficientData(final RealMatrix matrix) {
+ int nRows = matrix.getRowDimension();
+ int nCols = matrix.getColumnDimension();
+ if (nRows < 2 || nCols < 2) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.INSUFFICIENT_ROWS_AND_COLUMNS,
+ nRows, nCols);
+ }
+ }
+}
diff --git a/src/main/java/org/apache/commons/math/stat/correlation/SpearmansCorrelation.java b/src/main/java/org/apache/commons/math/stat/correlation/SpearmansCorrelation.java
new file mode 100644
index 0000000..fe121fe
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/correlation/SpearmansCorrelation.java
@@ -0,0 +1,172 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.math.stat.correlation;
+
+import org.apache.commons.math.MathRuntimeException;
+import org.apache.commons.math.exception.util.LocalizedFormats;
+import org.apache.commons.math.linear.BlockRealMatrix;
+import org.apache.commons.math.linear.RealMatrix;
+import org.apache.commons.math.stat.ranking.NaturalRanking;
+import org.apache.commons.math.stat.ranking.RankingAlgorithm;
+
+/**
+ * <p>Spearman's rank correlation. This implementation performs a rank
+ * transformation on the input data and then computes {@link PearsonsCorrelation}
+ * on the ranked data.</p>
+ *
+ * <p>By default, ranks are computed using {@link NaturalRanking} with default
+ * strategies for handling NaNs and ties in the data (NaNs maximal, ties averaged).
+ * The ranking algorithm can be set using a constructor argument.</p>
+ *
+ * @since 2.0
+ * @version $Revision: 983921 $ $Date: 2010-08-10 12:46:06 +0200 (mar. 10 août 2010) $
+ */
+
+public class SpearmansCorrelation {
+
+ /** Input data */
+ private final RealMatrix data;
+
+ /** Ranking algorithm */
+ private final RankingAlgorithm rankingAlgorithm;
+
+ /** Rank correlation */
+ private final PearsonsCorrelation rankCorrelation;
+
+ /**
+ * Create a SpearmansCorrelation with the given input data matrix
+ * and ranking algorithm.
+ *
+ * @param dataMatrix matrix of data with columns representing
+ * variables to correlate
+ * @param rankingAlgorithm ranking algorithm
+ */
+ public SpearmansCorrelation(final RealMatrix dataMatrix, final RankingAlgorithm rankingAlgorithm) {
+ this.data = dataMatrix.copy();
+ this.rankingAlgorithm = rankingAlgorithm;
+ rankTransform(data);
+ rankCorrelation = new PearsonsCorrelation(data);
+ }
+
+ /**
+ * Create a SpearmansCorrelation from the given data matrix.
+ *
+ * @param dataMatrix matrix of data with columns representing
+ * variables to correlate
+ */
+ public SpearmansCorrelation(final RealMatrix dataMatrix) {
+ this(dataMatrix, new NaturalRanking());
+ }
+
+ /**
+ * Create a SpearmansCorrelation without data.
+ */
+ public SpearmansCorrelation() {
+ data = null;
+ this.rankingAlgorithm = new NaturalRanking();
+ rankCorrelation = null;
+ }
+
+ /**
+ * Calculate the Spearman Rank Correlation Matrix.
+ *
+ * @return Spearman Rank Correlation Matrix
+ */
+ public RealMatrix getCorrelationMatrix() {
+ return rankCorrelation.getCorrelationMatrix();
+ }
+
+ /**
+ * Returns a {@link PearsonsCorrelation} instance constructed from the
+ * ranked input data. That is,
+ * <code>new SpearmansCorrelation(matrix).getRankCorrelation()</code>
+ * is equivalent to
+ * <code>new PearsonsCorrelation(rankTransform(matrix))</code> where
+ * <code>rankTransform(matrix)</code> is the result of applying the
+ * configured <code>RankingAlgorithm</code> to each of the columns of
+ * <code>matrix.</code>
+ *
+ * @return PearsonsCorrelation among ranked column data
+ */
+ public PearsonsCorrelation getRankCorrelation() {
+ return rankCorrelation;
+ }
+
+ /**
+ * Computes the Spearman's rank correlation matrix for the columns of the
+ * input matrix.
+ *
+ * @param matrix matrix with columns representing variables to correlate
+ * @return correlation matrix
+ */
+ public RealMatrix computeCorrelationMatrix(RealMatrix matrix) {
+ RealMatrix matrixCopy = matrix.copy();
+ rankTransform(matrixCopy);
+ return new PearsonsCorrelation().computeCorrelationMatrix(matrixCopy);
+ }
+
+ /**
+ * Computes the Spearman's rank correlation matrix for the columns of the
+ * input rectangular array. The columns of the array represent values
+ * of variables to be correlated.
+ *
+ * @param matrix matrix with columns representing variables to correlate
+ * @return correlation matrix
+ */
+ public RealMatrix computeCorrelationMatrix(double[][] matrix) {
+ return computeCorrelationMatrix(new BlockRealMatrix(matrix));
+ }
+
+ /**
+ * Computes the Spearman's rank correlation coefficient between the two arrays.
+ *
+ * </p>Throws IllegalArgumentException if the arrays do not have the same length
+ * or their common length is less than 2</p>
+ *
+ * @param xArray first data array
+ * @param yArray second data array
+ * @return Returns Spearman's rank correlation coefficient for the two arrays
+ * @throws IllegalArgumentException if the arrays lengths do not match or
+ * there is insufficient data
+ */
+ public double correlation(final double[] xArray, final double[] yArray)
+ throws IllegalArgumentException {
+ if (xArray.length != yArray.length) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE, xArray.length, yArray.length);
+ } else if (xArray.length < 2) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.INSUFFICIENT_DIMENSION, xArray.length, 2);
+ } else {
+ return new PearsonsCorrelation().correlation(rankingAlgorithm.rank(xArray),
+ rankingAlgorithm.rank(yArray));
+ }
+ }
+
+ /**
+ * Applies rank transform to each of the columns of <code>matrix</code>
+ * using the current <code>rankingAlgorithm</code>
+ *
+ * @param matrix matrix to transform
+ */
+ private void rankTransform(RealMatrix matrix) {
+ for (int i = 0; i < matrix.getColumnDimension(); i++) {
+ matrix.setColumn(i, rankingAlgorithm.rank(matrix.getColumn(i)));
+ }
+ }
+}
diff --git a/src/main/java/org/apache/commons/math/stat/correlation/package.html b/src/main/java/org/apache/commons/math/stat/correlation/package.html
new file mode 100644
index 0000000..8b12fc2
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/correlation/package.html
@@ -0,0 +1,22 @@
+<html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+ <!-- $Revision: 744716 $ $Date: 2009-02-15 19:38:49 +0100 (dim. 15 févr. 2009) $ -->
+ <body>
+ Correlations/Covariance computations.
+ </body>
+</html>
diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/AbstractStorelessUnivariateStatistic.java b/src/main/java/org/apache/commons/math/stat/descriptive/AbstractStorelessUnivariateStatistic.java
new file mode 100644
index 0000000..9e721ea
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/descriptive/AbstractStorelessUnivariateStatistic.java
@@ -0,0 +1,183 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.descriptive;
+
+import org.apache.commons.math.exception.util.LocalizedFormats;
+import org.apache.commons.math.exception.NullArgumentException;
+import org.apache.commons.math.util.MathUtils;
+
+/**
+ *
+ * Abstract implementation of the {@link StorelessUnivariateStatistic} interface.
+ * <p>
+ * Provides default <code>evaluate()</code> and <code>incrementAll(double[])<code>
+ * implementations.</p>
+ * <p>
+ * <strong>Note that these implementations are not synchronized.</strong></p>
+ *
+ * @version $Revision: 983921 $ $Date: 2010-08-10 12:46:06 +0200 (mar. 10 août 2010) $
+ */
+public abstract class AbstractStorelessUnivariateStatistic
+ extends AbstractUnivariateStatistic
+ implements StorelessUnivariateStatistic {
+
+ /**
+ * This default implementation calls {@link #clear}, then invokes
+ * {@link #increment} in a loop over the the input array, and then uses
+ * {@link #getResult} to compute the return value.
+ * <p>
+ * Note that this implementation changes the internal state of the
+ * statistic. Its side effects are the same as invoking {@link #clear} and
+ * then {@link #incrementAll(double[])}.</p>
+ * <p>
+ * Implementations may override this method with a more efficient and
+ * possibly more accurate implementation that works directly with the
+ * input array.</p>
+ * <p>
+ * If the array is null, an IllegalArgumentException is thrown.</p>
+ * @param values input array
+ * @return the value of the statistic applied to the input array
+ * @see org.apache.commons.math.stat.descriptive.UnivariateStatistic#evaluate(double[])
+ */
+ @Override
+ public double evaluate(final double[] values) {
+ if (values == null) {
+ throw new NullArgumentException(LocalizedFormats.INPUT_ARRAY);
+ }
+ return evaluate(values, 0, values.length);
+ }
+
+ /**
+ * This default implementation calls {@link #clear}, then invokes
+ * {@link #increment} in a loop over the specified portion of the input
+ * array, and then uses {@link #getResult} to compute the return value.
+ * <p>
+ * Note that this implementation changes the internal state of the
+ * statistic. Its side effects are the same as invoking {@link #clear} and
+ * then {@link #incrementAll(double[], int, int)}.</p>
+ * <p>
+ * Implementations may override this method with a more efficient and
+ * possibly more accurate implementation that works directly with the
+ * input array.</p>
+ * <p>
+ * If the array is null or the index parameters are not valid, an
+ * IllegalArgumentException is thrown.</p>
+ * @param values the input array
+ * @param begin the index of the first element to include
+ * @param length the number of elements to include
+ * @return the value of the statistic applied to the included array entries
+ * @see org.apache.commons.math.stat.descriptive.UnivariateStatistic#evaluate(double[], int, int)
+ */
+ @Override
+ public double evaluate(final double[] values, final int begin, final int length) {
+ if (test(values, begin, length)) {
+ clear();
+ incrementAll(values, begin, length);
+ }
+ return getResult();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public abstract StorelessUnivariateStatistic copy();
+
+ /**
+ * {@inheritDoc}
+ */
+ public abstract void clear();
+
+ /**
+ * {@inheritDoc}
+ */
+ public abstract double getResult();
+
+ /**
+ * {@inheritDoc}
+ */
+ public abstract void increment(final double d);
+
+ /**
+ * This default implementation just calls {@link #increment} in a loop over
+ * the input array.
+ * <p>
+ * Throws IllegalArgumentException if the input values array is null.</p>
+ *
+ * @param values values to add
+ * @throws IllegalArgumentException if values is null
+ * @see org.apache.commons.math.stat.descriptive.StorelessUnivariateStatistic#incrementAll(double[])
+ */
+ public void incrementAll(double[] values) {
+ if (values == null) {
+ throw new NullArgumentException(LocalizedFormats.INPUT_ARRAY);
+ }
+ incrementAll(values, 0, values.length);
+ }
+
+ /**
+ * This default implementation just calls {@link #increment} in a loop over
+ * the specified portion of the input array.
+ * <p>
+ * Throws IllegalArgumentException if the input values array is null.</p>
+ *
+ * @param values array holding values to add
+ * @param begin index of the first array element to add
+ * @param length number of array elements to add
+ * @throws IllegalArgumentException if values is null
+ * @see org.apache.commons.math.stat.descriptive.StorelessUnivariateStatistic#incrementAll(double[], int, int)
+ */
+ public void incrementAll(double[] values, int begin, int length) {
+ if (test(values, begin, length)) {
+ int k = begin + length;
+ for (int i = begin; i < k; i++) {
+ increment(values[i]);
+ }
+ }
+ }
+
+ /**
+ * Returns true iff <code>object</code> is an
+ * <code>AbstractStorelessUnivariateStatistic</code> returning the same
+ * values as this for <code>getResult()</code> and <code>getN()</code>
+ * @param object object to test equality against.
+ * @return true if object returns the same value as this
+ */
+ @Override
+ public boolean equals(Object object) {
+ if (object == this ) {
+ return true;
+ }
+ if (object instanceof AbstractStorelessUnivariateStatistic == false) {
+ return false;
+ }
+ AbstractStorelessUnivariateStatistic stat = (AbstractStorelessUnivariateStatistic) object;
+ return MathUtils.equalsIncludingNaN(stat.getResult(), this.getResult()) &&
+ MathUtils.equalsIncludingNaN(stat.getN(), this.getN());
+ }
+
+ /**
+ * Returns hash code based on getResult() and getN()
+ *
+ * @return hash code
+ */
+ @Override
+ public int hashCode() {
+ return 31* (31 + MathUtils.hash(getResult())) + MathUtils.hash(getN());
+ }
+
+}
diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/AbstractUnivariateStatistic.java b/src/main/java/org/apache/commons/math/stat/descriptive/AbstractUnivariateStatistic.java
new file mode 100644
index 0000000..354dee6
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/descriptive/AbstractUnivariateStatistic.java
@@ -0,0 +1,232 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.descriptive;
+
+import org.apache.commons.math.MathRuntimeException;
+import org.apache.commons.math.exception.DimensionMismatchException;
+import org.apache.commons.math.exception.NotPositiveException;
+import org.apache.commons.math.exception.NullArgumentException;
+import org.apache.commons.math.exception.util.LocalizedFormats;
+
+/**
+ * Abstract base class for all implementations of the
+ * {@link UnivariateStatistic} interface.
+ * <p>
+ * Provides a default implementation of <code>evaluate(double[]),</code>
+ * delegating to <code>evaluate(double[], int, int)</code> in the natural way.
+ * </p>
+ * <p>
+ * Also includes a <code>test</code> method that performs generic parameter
+ * validation for the <code>evaluate</code> methods.</p>
+ *
+ * @version $Revision: 1006299 $ $Date: 2010-10-10 16:47:17 +0200 (dim. 10 oct. 2010) $
+ */
+public abstract class AbstractUnivariateStatistic
+ implements UnivariateStatistic {
+
+ /** Stored data. */
+ private double[] storedData;
+
+ /**
+ * Set the data array.
+ * <p>
+ * The stored value is a copy of the parameter array, not the array itself
+ * </p>
+ * @param values data array to store (may be null to remove stored data)
+ * @see #evaluate()
+ */
+ public void setData(final double[] values) {
+ storedData = (values == null) ? null : values.clone();
+ }
+
+ /**
+ * Get a copy of the stored data array.
+ * @return copy of the stored data array (may be null)
+ */
+ public double[] getData() {
+ return (storedData == null) ? null : storedData.clone();
+ }
+
+ /**
+ * Get a reference to the stored data array.
+ * @return reference to the stored data array (may be null)
+ */
+ protected double[] getDataRef() {
+ return storedData;
+ }
+
+ /**
+ * Set the data array.
+ * @param values data array to store
+ * @param begin the index of the first element to include
+ * @param length the number of elements to include
+ * @see #evaluate()
+ */
+ public void setData(final double[] values, final int begin, final int length) {
+ storedData = new double[length];
+ System.arraycopy(values, begin, storedData, 0, length);
+ }
+
+ /**
+ * Returns the result of evaluating the statistic over the stored data.
+ * <p>
+ * The stored array is the one which was set by previous calls to
+ * </p>
+ * @return the value of the statistic applied to the stored data
+ */
+ public double evaluate() {
+ return evaluate(storedData);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public double evaluate(final double[] values) {
+ test(values, 0, 0);
+ return evaluate(values, 0, values.length);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public abstract double evaluate(final double[] values, final int begin, final int length);
+
+ /**
+ * {@inheritDoc}
+ */
+ public abstract UnivariateStatistic copy();
+
+ /**
+ * This method is used by <code>evaluate(double[], int, int)</code> methods
+ * to verify that the input parameters designate a subarray of positive length.
+ * <p>
+ * <ul>
+ * <li>returns <code>true</code> iff the parameters designate a subarray of
+ * positive length</li>
+ * <li>throws <code>IllegalArgumentException</code> if the array is null or
+ * or the indices are invalid</li>
+ * <li>returns <code>false</li> if the array is non-null, but
+ * <code>length</code> is 0.
+ * </ul></p>
+ *
+ * @param values the input array
+ * @param begin index of the first array element to include
+ * @param length the number of elements to include
+ * @return true if the parameters are valid and designate a subarray of positive length
+ * @throws IllegalArgumentException if the indices are invalid or the array is null
+ */
+ protected boolean test(
+ final double[] values,
+ final int begin,
+ final int length) {
+
+ if (values == null) {
+ throw new NullArgumentException(LocalizedFormats.INPUT_ARRAY);
+ }
+
+ if (begin < 0) {
+ throw new NotPositiveException(LocalizedFormats.START_POSITION, begin);
+ }
+
+ if (length < 0) {
+ throw new NotPositiveException(LocalizedFormats.LENGTH, length);
+ }
+
+ if (begin + length > values.length) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.SUBARRAY_ENDS_AFTER_ARRAY_END);
+ }
+
+ if (length == 0) {
+ return false;
+ }
+
+ return true;
+
+ }
+
+ /**
+ * This method is used by <code>evaluate(double[], double[], int, int)</code> methods
+ * to verify that the begin and length parameters designate a subarray of positive length
+ * and the weights are all non-negative, non-NaN, finite, and not all zero.
+ * <p>
+ * <ul>
+ * <li>returns <code>true</code> iff the parameters designate a subarray of
+ * positive length and the weights array contains legitimate values.</li>
+ * <li>throws <code>IllegalArgumentException</code> if any of the following are true:
+ * <ul><li>the values array is null</li>
+ * <li>the weights array is null</li>
+ * <li>the weights array does not have the same length as the values array</li>
+ * <li>the weights array contains one or more infinite values</li>
+ * <li>the weights array contains one or more NaN values</li>
+ * <li>the weights array contains negative values</li>
+ * <li>the start and length arguments do not determine a valid array</li></ul>
+ * </li>
+ * <li>returns <code>false</li> if the array is non-null, but
+ * <code>length</code> is 0.
+ * </ul></p>
+ *
+ * @param values the input array
+ * @param weights the weights array
+ * @param begin index of the first array element to include
+ * @param length the number of elements to include
+ * @return true if the parameters are valid and designate a subarray of positive length
+ * @throws IllegalArgumentException if the indices are invalid or the array is null
+ * @since 2.1
+ */
+ protected boolean test(
+ final double[] values,
+ final double[] weights,
+ final int begin,
+ final int length) {
+
+ if (weights == null) {
+ throw new NullArgumentException(LocalizedFormats.INPUT_ARRAY);
+ }
+
+ if (weights.length != values.length) {
+ throw new DimensionMismatchException(weights.length, values.length);
+ }
+
+ boolean containsPositiveWeight = false;
+ for (int i = begin; i < begin + length; i++) {
+ if (Double.isNaN(weights[i])) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.NAN_ELEMENT_AT_INDEX, i);
+ }
+ if (Double.isInfinite(weights[i])) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.INFINITE_ARRAY_ELEMENT, weights[i], i);
+ }
+ if (weights[i] < 0) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.NEGATIVE_ELEMENT_AT_INDEX, i, weights[i]);
+ }
+ if (!containsPositiveWeight && weights[i] > 0.0) {
+ containsPositiveWeight = true;
+ }
+ }
+
+ if (!containsPositiveWeight) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.WEIGHT_AT_LEAST_ONE_NON_ZERO);
+ }
+
+ return test(values, begin, length);
+ }
+}
+
diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/AggregateSummaryStatistics.java b/src/main/java/org/apache/commons/math/stat/descriptive/AggregateSummaryStatistics.java
new file mode 100644
index 0000000..98c58c8
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/descriptive/AggregateSummaryStatistics.java
@@ -0,0 +1,416 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.math.stat.descriptive;
+
+import java.io.Serializable;
+import java.util.Collection;
+import java.util.Iterator;
+
+/**
+ * <p>
+ * An aggregator for {@code SummaryStatistics} from several data sets or
+ * data set partitions. In its simplest usage mode, the client creates an
+ * instance via the zero-argument constructor, then uses
+ * {@link #createContributingStatistics()} to obtain a {@code SummaryStatistics}
+ * for each individual data set / partition. The per-set statistics objects
+ * are used as normal, and at any time the aggregate statistics for all the
+ * contributors can be obtained from this object.
+ * </p><p>
+ * Clients with specialized requirements can use alternative constructors to
+ * control the statistics implementations and initial values used by the
+ * contributing and the internal aggregate {@code SummaryStatistics} objects.
+ * </p><p>
+ * A static {@link #aggregate(Collection)} method is also included that computes
+ * aggregate statistics directly from a Collection of SummaryStatistics instances.
+ * </p><p>
+ * When {@link #createContributingStatistics()} is used to create SummaryStatistics
+ * instances to be aggregated concurrently, the created instances'
+ * {@link SummaryStatistics#addValue(double)} methods must synchronize on the aggregating
+ * instance maintained by this class. In multithreaded environments, if the functionality
+ * provided by {@link #aggregate(Collection)} is adequate, that method should be used
+ * to avoid unecessary computation and synchronization delays.</p>
+ *
+ * @since 2.0
+ * @version $Revision: 811833 $ $Date: 2009-09-06 18:27:50 +0200 (dim. 06 sept. 2009) $
+ *
+ */
+public class AggregateSummaryStatistics implements StatisticalSummary,
+ Serializable {
+
+
+ /** Serializable version identifier */
+ private static final long serialVersionUID = -8207112444016386906L;
+
+ /**
+ * A SummaryStatistics serving as a prototype for creating SummaryStatistics
+ * contributing to this aggregate
+ */
+ private final SummaryStatistics statisticsPrototype;
+
+ /**
+ * The SummaryStatistics in which aggregate statistics are accumulated.
+ */
+ private final SummaryStatistics statistics;
+
+ /**
+ * Initializes a new AggregateSummaryStatistics with default statistics
+ * implementations.
+ *
+ */
+ public AggregateSummaryStatistics() {
+ this(new SummaryStatistics());
+ }
+
+ /**
+ * Initializes a new AggregateSummaryStatistics with the specified statistics
+ * object as a prototype for contributing statistics and for the internal
+ * aggregate statistics. This provides for customized statistics implementations
+ * to be used by contributing and aggregate statistics.
+ *
+ * @param prototypeStatistics a {@code SummaryStatistics} serving as a
+ * prototype both for the internal aggregate statistics and for
+ * contributing statistics obtained via the
+ * {@code createContributingStatistics()} method. Being a prototype
+ * means that other objects are initialized by copying this object's state.
+ * If {@code null}, a new, default statistics object is used. Any statistic
+ * values in the prototype are propagated to contributing statistics
+ * objects and (once) into these aggregate statistics.
+ * @see #createContributingStatistics()
+ */
+ public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics) {
+ this(prototypeStatistics,
+ prototypeStatistics == null ? null : new SummaryStatistics(prototypeStatistics));
+ }
+
+ /**
+ * Initializes a new AggregateSummaryStatistics with the specified statistics
+ * object as a prototype for contributing statistics and for the internal
+ * aggregate statistics. This provides for different statistics implementations
+ * to be used by contributing and aggregate statistics and for an initial
+ * state to be supplied for the aggregate statistics.
+ *
+ * @param prototypeStatistics a {@code SummaryStatistics} serving as a
+ * prototype both for the internal aggregate statistics and for
+ * contributing statistics obtained via the
+ * {@code createContributingStatistics()} method. Being a prototype
+ * means that other objects are initialized by copying this object's state.
+ * If {@code null}, a new, default statistics object is used. Any statistic
+ * values in the prototype are propagated to contributing statistics
+ * objects, but not into these aggregate statistics.
+ * @param initialStatistics a {@code SummaryStatistics} to serve as the
+ * internal aggregate statistics object. If {@code null}, a new, default
+ * statistics object is used.
+ * @see #createContributingStatistics()
+ */
+ public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics,
+ SummaryStatistics initialStatistics) {
+ this.statisticsPrototype =
+ (prototypeStatistics == null) ? new SummaryStatistics() : prototypeStatistics;
+ this.statistics =
+ (initialStatistics == null) ? new SummaryStatistics() : initialStatistics;
+ }
+
+ /**
+ * {@inheritDoc}. This version returns the maximum over all the aggregated
+ * data.
+ *
+ * @see StatisticalSummary#getMax()
+ */
+ public double getMax() {
+ synchronized (statistics) {
+ return statistics.getMax();
+ }
+ }
+
+ /**
+ * {@inheritDoc}. This version returns the mean of all the aggregated data.
+ *
+ * @see StatisticalSummary#getMean()
+ */
+ public double getMean() {
+ synchronized (statistics) {
+ return statistics.getMean();
+ }
+ }
+
+ /**
+ * {@inheritDoc}. This version returns the minimum over all the aggregated
+ * data.
+ *
+ * @see StatisticalSummary#getMin()
+ */
+ public double getMin() {
+ synchronized (statistics) {
+ return statistics.getMin();
+ }
+ }
+
+ /**
+ * {@inheritDoc}. This version returns a count of all the aggregated data.
+ *
+ * @see StatisticalSummary#getN()
+ */
+ public long getN() {
+ synchronized (statistics) {
+ return statistics.getN();
+ }
+ }
+
+ /**
+ * {@inheritDoc}. This version returns the standard deviation of all the
+ * aggregated data.
+ *
+ * @see StatisticalSummary#getStandardDeviation()
+ */
+ public double getStandardDeviation() {
+ synchronized (statistics) {
+ return statistics.getStandardDeviation();
+ }
+ }
+
+ /**
+ * {@inheritDoc}. This version returns a sum of all the aggregated data.
+ *
+ * @see StatisticalSummary#getSum()
+ */
+ public double getSum() {
+ synchronized (statistics) {
+ return statistics.getSum();
+ }
+ }
+
+ /**
+ * {@inheritDoc}. This version returns the variance of all the aggregated
+ * data.
+ *
+ * @see StatisticalSummary#getVariance()
+ */
+ public double getVariance() {
+ synchronized (statistics) {
+ return statistics.getVariance();
+ }
+ }
+
+ /**
+ * Returns the sum of the logs of all the aggregated data.
+ *
+ * @return the sum of logs
+ * @see SummaryStatistics#getSumOfLogs()
+ */
+ public double getSumOfLogs() {
+ synchronized (statistics) {
+ return statistics.getSumOfLogs();
+ }
+ }
+
+ /**
+ * Returns the geometric mean of all the aggregated data.
+ *
+ * @return the geometric mean
+ * @see SummaryStatistics#getGeometricMean()
+ */
+ public double getGeometricMean() {
+ synchronized (statistics) {
+ return statistics.getGeometricMean();
+ }
+ }
+
+ /**
+ * Returns the sum of the squares of all the aggregated data.
+ *
+ * @return The sum of squares
+ * @see SummaryStatistics#getSumsq()
+ */
+ public double getSumsq() {
+ synchronized (statistics) {
+ return statistics.getSumsq();
+ }
+ }
+
+ /**
+ * Returns a statistic related to the Second Central Moment. Specifically,
+ * what is returned is the sum of squared deviations from the sample mean
+ * among the all of the aggregated data.
+ *
+ * @return second central moment statistic
+ * @see SummaryStatistics#getSecondMoment()
+ */
+ public double getSecondMoment() {
+ synchronized (statistics) {
+ return statistics.getSecondMoment();
+ }
+ }
+
+ /**
+ * Return a {@link StatisticalSummaryValues} instance reporting current
+ * aggregate statistics.
+ *
+ * @return Current values of aggregate statistics
+ */
+ public StatisticalSummary getSummary() {
+ synchronized (statistics) {
+ return new StatisticalSummaryValues(getMean(), getVariance(), getN(),
+ getMax(), getMin(), getSum());
+ }
+ }
+
+ /**
+ * Creates and returns a {@code SummaryStatistics} whose data will be
+ * aggregated with those of this {@code AggregateSummaryStatistics}.
+ *
+ * @return a {@code SummaryStatistics} whose data will be aggregated with
+ * those of this {@code AggregateSummaryStatistics}. The initial state
+ * is a copy of the configured prototype statistics.
+ */
+ public SummaryStatistics createContributingStatistics() {
+ SummaryStatistics contributingStatistics
+ = new AggregatingSummaryStatistics(statistics);
+
+ SummaryStatistics.copy(statisticsPrototype, contributingStatistics);
+
+ return contributingStatistics;
+ }
+
+ /**
+ * Computes aggregate summary statistics. This method can be used to combine statistics
+ * computed over partitions or subsamples - i.e., the StatisticalSummaryValues returned
+ * should contain the same values that would have been obtained by computing a single
+ * StatisticalSummary over the combined dataset.
+ * <p>
+ * Returns null if the collection is empty or null.
+ * </p>
+ *
+ * @param statistics collection of SummaryStatistics to aggregate
+ * @return summary statistics for the combined dataset
+ */
+ public static StatisticalSummaryValues aggregate(Collection<SummaryStatistics> statistics) {
+ if (statistics == null) {
+ return null;
+ }
+ Iterator<SummaryStatistics> iterator = statistics.iterator();
+ if (!iterator.hasNext()) {
+ return null;
+ }
+ SummaryStatistics current = iterator.next();
+ long n = current.getN();
+ double min = current.getMin();
+ double sum = current.getSum();
+ double max = current.getMax();
+ double m2 = current.getSecondMoment();
+ double mean = current.getMean();
+ while (iterator.hasNext()) {
+ current = iterator.next();
+ if (current.getMin() < min || Double.isNaN(min)) {
+ min = current.getMin();
+ }
+ if (current.getMax() > max || Double.isNaN(max)) {
+ max = current.getMax();
+ }
+ sum += current.getSum();
+ final double oldN = n;
+ final double curN = current.getN();
+ n += curN;
+ final double meanDiff = current.getMean() - mean;
+ mean = sum / n;
+ m2 = m2 + current.getSecondMoment() + meanDiff * meanDiff * oldN * curN / n;
+ }
+ final double variance;
+ if (n == 0) {
+ variance = Double.NaN;
+ } else if (n == 1) {
+ variance = 0d;
+ } else {
+ variance = m2 / (n - 1);
+ }
+ return new StatisticalSummaryValues(mean, variance, n, max, min, sum);
+ }
+
+ /**
+ * A SummaryStatistics that also forwards all values added to it to a second
+ * {@code SummaryStatistics} for aggregation.
+ *
+ * @since 2.0
+ */
+ private static class AggregatingSummaryStatistics extends SummaryStatistics {
+
+ /**
+ * The serialization version of this class
+ */
+ private static final long serialVersionUID = 1L;
+
+ /**
+ * An additional SummaryStatistics into which values added to these
+ * statistics (and possibly others) are aggregated
+ */
+ private final SummaryStatistics aggregateStatistics;
+
+ /**
+ * Initializes a new AggregatingSummaryStatistics with the specified
+ * aggregate statistics object
+ *
+ * @param aggregateStatistics a {@code SummaryStatistics} into which
+ * values added to this statistics object should be aggregated
+ */
+ public AggregatingSummaryStatistics(SummaryStatistics aggregateStatistics) {
+ this.aggregateStatistics = aggregateStatistics;
+ }
+
+ /**
+ * {@inheritDoc}. This version adds the provided value to the configured
+ * aggregate after adding it to these statistics.
+ *
+ * @see SummaryStatistics#addValue(double)
+ */
+ @Override
+ public void addValue(double value) {
+ super.addValue(value);
+ synchronized (aggregateStatistics) {
+ aggregateStatistics.addValue(value);
+ }
+ }
+
+ /**
+ * Returns true iff <code>object</code> is a
+ * <code>SummaryStatistics</code> instance and all statistics have the
+ * same values as this.
+ * @param object the object to test equality against.
+ * @return true if object equals this
+ */
+ @Override
+ public boolean equals(Object object) {
+ if (object == this) {
+ return true;
+ }
+ if (object instanceof AggregatingSummaryStatistics == false) {
+ return false;
+ }
+ AggregatingSummaryStatistics stat = (AggregatingSummaryStatistics)object;
+ return super.equals(stat) &&
+ aggregateStatistics.equals(stat.aggregateStatistics);
+ }
+
+ /**
+ * Returns hash code based on values of statistics
+ * @return hash code
+ */
+ @Override
+ public int hashCode() {
+ return 123 + super.hashCode() + aggregateStatistics.hashCode();
+ }
+ }
+}
diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/DescriptiveStatistics.java b/src/main/java/org/apache/commons/math/stat/descriptive/DescriptiveStatistics.java
new file mode 100644
index 0000000..e5a18dc
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/descriptive/DescriptiveStatistics.java
@@ -0,0 +1,721 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.descriptive;
+
+import java.io.Serializable;
+import java.lang.reflect.InvocationTargetException;
+import java.util.Arrays;
+
+import org.apache.commons.math.MathRuntimeException;
+import org.apache.commons.math.exception.util.LocalizedFormats;
+import org.apache.commons.math.stat.descriptive.moment.GeometricMean;
+import org.apache.commons.math.stat.descriptive.moment.Kurtosis;
+import org.apache.commons.math.stat.descriptive.moment.Mean;
+import org.apache.commons.math.stat.descriptive.moment.Skewness;
+import org.apache.commons.math.stat.descriptive.moment.Variance;
+import org.apache.commons.math.stat.descriptive.rank.Max;
+import org.apache.commons.math.stat.descriptive.rank.Min;
+import org.apache.commons.math.stat.descriptive.rank.Percentile;
+import org.apache.commons.math.stat.descriptive.summary.Sum;
+import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
+import org.apache.commons.math.util.ResizableDoubleArray;
+import org.apache.commons.math.util.FastMath;
+
+
+/**
+ * Maintains a dataset of values of a single variable and computes descriptive
+ * statistics based on stored data. The {@link #getWindowSize() windowSize}
+ * property sets a limit on the number of values that can be stored in the
+ * dataset. The default value, INFINITE_WINDOW, puts no limit on the size of
+ * the dataset. This value should be used with caution, as the backing store
+ * will grow without bound in this case. For very large datasets,
+ * {@link SummaryStatistics}, which does not store the dataset, should be used
+ * instead of this class. If <code>windowSize</code> is not INFINITE_WINDOW and
+ * more values are added than can be stored in the dataset, new values are
+ * added in a "rolling" manner, with new values replacing the "oldest" values
+ * in the dataset.
+ *
+ * <p>Note: this class is not threadsafe. Use
+ * {@link SynchronizedDescriptiveStatistics} if concurrent access from multiple
+ * threads is required.</p>
+ *
+ * @version $Revision: 1054186 $ $Date: 2011-01-01 03:28:46 +0100 (sam. 01 janv. 2011) $
+ */
+public class DescriptiveStatistics implements StatisticalSummary, Serializable {
+
+ /**
+ * Represents an infinite window size. When the {@link #getWindowSize()}
+ * returns this value, there is no limit to the number of data values
+ * that can be stored in the dataset.
+ */
+ public static final int INFINITE_WINDOW = -1;
+
+ /** Serialization UID */
+ private static final long serialVersionUID = 4133067267405273064L;
+
+ /** Name of the setQuantile method. */
+ private static final String SET_QUANTILE_METHOD_NAME = "setQuantile";
+
+ /** hold the window size **/
+ protected int windowSize = INFINITE_WINDOW;
+
+ /**
+ * Stored data values
+ */
+ protected ResizableDoubleArray eDA = new ResizableDoubleArray();
+
+ /** Mean statistic implementation - can be reset by setter. */
+ private UnivariateStatistic meanImpl = new Mean();
+
+ /** Geometric mean statistic implementation - can be reset by setter. */
+ private UnivariateStatistic geometricMeanImpl = new GeometricMean();
+
+ /** Kurtosis statistic implementation - can be reset by setter. */
+ private UnivariateStatistic kurtosisImpl = new Kurtosis();
+
+ /** Maximum statistic implementation - can be reset by setter. */
+ private UnivariateStatistic maxImpl = new Max();
+
+ /** Minimum statistic implementation - can be reset by setter. */
+ private UnivariateStatistic minImpl = new Min();
+
+ /** Percentile statistic implementation - can be reset by setter. */
+ private UnivariateStatistic percentileImpl = new Percentile();
+
+ /** Skewness statistic implementation - can be reset by setter. */
+ private UnivariateStatistic skewnessImpl = new Skewness();
+
+ /** Variance statistic implementation - can be reset by setter. */
+ private UnivariateStatistic varianceImpl = new Variance();
+
+ /** Sum of squares statistic implementation - can be reset by setter. */
+ private UnivariateStatistic sumsqImpl = new SumOfSquares();
+
+ /** Sum statistic implementation - can be reset by setter. */
+ private UnivariateStatistic sumImpl = new Sum();
+
+ /**
+ * Construct a DescriptiveStatistics instance with an infinite window
+ */
+ public DescriptiveStatistics() {
+ }
+
+ /**
+ * Construct a DescriptiveStatistics instance with the specified window
+ *
+ * @param window the window size.
+ */
+ public DescriptiveStatistics(int window) {
+ setWindowSize(window);
+ }
+
+ /**
+ * Construct a DescriptiveStatistics instance with an infinite window
+ * and the initial data values in double[] initialDoubleArray.
+ * If initialDoubleArray is null, then this constructor corresponds to
+ * DescriptiveStatistics()
+ *
+ * @param initialDoubleArray the initial double[].
+ */
+ public DescriptiveStatistics(double[] initialDoubleArray) {
+ if (initialDoubleArray != null) {
+ eDA = new ResizableDoubleArray(initialDoubleArray);
+ }
+ }
+
+ /**
+ * Copy constructor. Construct a new DescriptiveStatistics instance that
+ * is a copy of original.
+ *
+ * @param original DescriptiveStatistics instance to copy
+ */
+ public DescriptiveStatistics(DescriptiveStatistics original) {
+ copy(original, this);
+ }
+
+ /**
+ * Adds the value to the dataset. If the dataset is at the maximum size
+ * (i.e., the number of stored elements equals the currently configured
+ * windowSize), the first (oldest) element in the dataset is discarded
+ * to make room for the new value.
+ *
+ * @param v the value to be added
+ */
+ public void addValue(double v) {
+ if (windowSize != INFINITE_WINDOW) {
+ if (getN() == windowSize) {
+ eDA.addElementRolling(v);
+ } else if (getN() < windowSize) {
+ eDA.addElement(v);
+ }
+ } else {
+ eDA.addElement(v);
+ }
+ }
+
+ /**
+ * Removes the most recent value from the dataset.
+ */
+ public void removeMostRecentValue() {
+ eDA.discardMostRecentElements(1);
+ }
+
+ /**
+ * Replaces the most recently stored value with the given value.
+ * There must be at least one element stored to call this method.
+ *
+ * @param v the value to replace the most recent stored value
+ * @return replaced value
+ */
+ public double replaceMostRecentValue(double v) {
+ return eDA.substituteMostRecentElement(v);
+ }
+
+ /**
+ * Returns the <a href="http://www.xycoon.com/arithmetic_mean.htm">
+ * arithmetic mean </a> of the available values
+ * @return The mean or Double.NaN if no values have been added.
+ */
+ public double getMean() {
+ return apply(meanImpl);
+ }
+
+ /**
+ * Returns the <a href="http://www.xycoon.com/geometric_mean.htm">
+ * geometric mean </a> of the available values
+ * @return The geometricMean, Double.NaN if no values have been added,
+ * or if the product of the available values is less than or equal to 0.
+ */
+ public double getGeometricMean() {
+ return apply(geometricMeanImpl);
+ }
+
+ /**
+ * Returns the variance of the available values.
+ * @return The variance, Double.NaN if no values have been added
+ * or 0.0 for a single value set.
+ */
+ public double getVariance() {
+ return apply(varianceImpl);
+ }
+
+ /**
+ * Returns the standard deviation of the available values.
+ * @return The standard deviation, Double.NaN if no values have been added
+ * or 0.0 for a single value set.
+ */
+ public double getStandardDeviation() {
+ double stdDev = Double.NaN;
+ if (getN() > 0) {
+ if (getN() > 1) {
+ stdDev = FastMath.sqrt(getVariance());
+ } else {
+ stdDev = 0.0;
+ }
+ }
+ return stdDev;
+ }
+
+ /**
+ * Returns the skewness of the available values. Skewness is a
+ * measure of the asymmetry of a given distribution.
+ * @return The skewness, Double.NaN if no values have been added
+ * or 0.0 for a value set &lt;=2.
+ */
+ public double getSkewness() {
+ return apply(skewnessImpl);
+ }
+
+ /**
+ * Returns the Kurtosis of the available values. Kurtosis is a
+ * measure of the "peakedness" of a distribution
+ * @return The kurtosis, Double.NaN if no values have been added, or 0.0
+ * for a value set &lt;=3.
+ */
+ public double getKurtosis() {
+ return apply(kurtosisImpl);
+ }
+
+ /**
+ * Returns the maximum of the available values
+ * @return The max or Double.NaN if no values have been added.
+ */
+ public double getMax() {
+ return apply(maxImpl);
+ }
+
+ /**
+ * Returns the minimum of the available values
+ * @return The min or Double.NaN if no values have been added.
+ */
+ public double getMin() {
+ return apply(minImpl);
+ }
+
+ /**
+ * Returns the number of available values
+ * @return The number of available values
+ */
+ public long getN() {
+ return eDA.getNumElements();
+ }
+
+ /**
+ * Returns the sum of the values that have been added to Univariate.
+ * @return The sum or Double.NaN if no values have been added
+ */
+ public double getSum() {
+ return apply(sumImpl);
+ }
+
+ /**
+ * Returns the sum of the squares of the available values.
+ * @return The sum of the squares or Double.NaN if no
+ * values have been added.
+ */
+ public double getSumsq() {
+ return apply(sumsqImpl);
+ }
+
+ /**
+ * Resets all statistics and storage
+ */
+ public void clear() {
+ eDA.clear();
+ }
+
+
+ /**
+ * Returns the maximum number of values that can be stored in the
+ * dataset, or INFINITE_WINDOW (-1) if there is no limit.
+ *
+ * @return The current window size or -1 if its Infinite.
+ */
+ public int getWindowSize() {
+ return windowSize;
+ }
+
+ /**
+ * WindowSize controls the number of values which contribute
+ * to the reported statistics. For example, if
+ * windowSize is set to 3 and the values {1,2,3,4,5}
+ * have been added <strong> in that order</strong>
+ * then the <i>available values</i> are {3,4,5} and all
+ * reported statistics will be based on these values
+ * @param windowSize sets the size of the window.
+ */
+ public void setWindowSize(int windowSize) {
+ if (windowSize < 1) {
+ if (windowSize != INFINITE_WINDOW) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.NOT_POSITIVE_WINDOW_SIZE, windowSize);
+ }
+ }
+
+ this.windowSize = windowSize;
+
+ // We need to check to see if we need to discard elements
+ // from the front of the array. If the windowSize is less than
+ // the current number of elements.
+ if (windowSize != INFINITE_WINDOW && windowSize < eDA.getNumElements()) {
+ eDA.discardFrontElements(eDA.getNumElements() - windowSize);
+ }
+ }
+
+ /**
+ * Returns the current set of values in an array of double primitives.
+ * The order of addition is preserved. The returned array is a fresh
+ * copy of the underlying data -- i.e., it is not a reference to the
+ * stored data.
+ *
+ * @return returns the current set of numbers in the order in which they
+ * were added to this set
+ */
+ public double[] getValues() {
+ return eDA.getElements();
+ }
+
+ /**
+ * Returns the current set of values in an array of double primitives,
+ * sorted in ascending order. The returned array is a fresh
+ * copy of the underlying data -- i.e., it is not a reference to the
+ * stored data.
+ * @return returns the current set of
+ * numbers sorted in ascending order
+ */
+ public double[] getSortedValues() {
+ double[] sort = getValues();
+ Arrays.sort(sort);
+ return sort;
+ }
+
+ /**
+ * Returns the element at the specified index
+ * @param index The Index of the element
+ * @return return the element at the specified index
+ */
+ public double getElement(int index) {
+ return eDA.getElement(index);
+ }
+
+ /**
+ * Returns an estimate for the pth percentile of the stored values.
+ * <p>
+ * The implementation provided here follows the first estimation procedure presented
+ * <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc252.htm">here.</a>
+ * </p><p>
+ * <strong>Preconditions</strong>:<ul>
+ * <li><code>0 &lt; p &le; 100</code> (otherwise an
+ * <code>IllegalArgumentException</code> is thrown)</li>
+ * <li>at least one value must be stored (returns <code>Double.NaN
+ * </code> otherwise)</li>
+ * </ul></p>
+ *
+ * @param p the requested percentile (scaled from 0 - 100)
+ * @return An estimate for the pth percentile of the stored data
+ * @throws IllegalStateException if percentile implementation has been
+ * overridden and the supplied implementation does not support setQuantile
+ * values
+ */
+ public double getPercentile(double p) {
+ if (percentileImpl instanceof Percentile) {
+ ((Percentile) percentileImpl).setQuantile(p);
+ } else {
+ try {
+ percentileImpl.getClass().getMethod(SET_QUANTILE_METHOD_NAME,
+ new Class[] {Double.TYPE}).invoke(percentileImpl,
+ new Object[] {Double.valueOf(p)});
+ } catch (NoSuchMethodException e1) { // Setter guard should prevent
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.PERCENTILE_IMPLEMENTATION_UNSUPPORTED_METHOD,
+ percentileImpl.getClass().getName(), SET_QUANTILE_METHOD_NAME);
+ } catch (IllegalAccessException e2) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.PERCENTILE_IMPLEMENTATION_CANNOT_ACCESS_METHOD,
+ SET_QUANTILE_METHOD_NAME, percentileImpl.getClass().getName());
+ } catch (InvocationTargetException e3) {
+ throw MathRuntimeException.createIllegalArgumentException(e3.getCause());
+ }
+ }
+ return apply(percentileImpl);
+ }
+
+ /**
+ * Generates a text report displaying univariate statistics from values
+ * that have been added. Each statistic is displayed on a separate
+ * line.
+ *
+ * @return String with line feeds displaying statistics
+ */
+ @Override
+ public String toString() {
+ StringBuilder outBuffer = new StringBuilder();
+ String endl = "\n";
+ outBuffer.append("DescriptiveStatistics:").append(endl);
+ outBuffer.append("n: ").append(getN()).append(endl);
+ outBuffer.append("min: ").append(getMin()).append(endl);
+ outBuffer.append("max: ").append(getMax()).append(endl);
+ outBuffer.append("mean: ").append(getMean()).append(endl);
+ outBuffer.append("std dev: ").append(getStandardDeviation())
+ .append(endl);
+ outBuffer.append("median: ").append(getPercentile(50)).append(endl);
+ outBuffer.append("skewness: ").append(getSkewness()).append(endl);
+ outBuffer.append("kurtosis: ").append(getKurtosis()).append(endl);
+ return outBuffer.toString();
+ }
+
+ /**
+ * Apply the given statistic to the data associated with this set of statistics.
+ * @param stat the statistic to apply
+ * @return the computed value of the statistic.
+ */
+ public double apply(UnivariateStatistic stat) {
+ return stat.evaluate(eDA.getInternalValues(), eDA.start(), eDA.getNumElements());
+ }
+
+ // Implementation getters and setter
+
+ /**
+ * Returns the currently configured mean implementation.
+ *
+ * @return the UnivariateStatistic implementing the mean
+ * @since 1.2
+ */
+ public synchronized UnivariateStatistic getMeanImpl() {
+ return meanImpl;
+ }
+
+ /**
+ * <p>Sets the implementation for the mean.</p>
+ *
+ * @param meanImpl the UnivariateStatistic instance to use
+ * for computing the mean
+ * @since 1.2
+ */
+ public synchronized void setMeanImpl(UnivariateStatistic meanImpl) {
+ this.meanImpl = meanImpl;
+ }
+
+ /**
+ * Returns the currently configured geometric mean implementation.
+ *
+ * @return the UnivariateStatistic implementing the geometric mean
+ * @since 1.2
+ */
+ public synchronized UnivariateStatistic getGeometricMeanImpl() {
+ return geometricMeanImpl;
+ }
+
+ /**
+ * <p>Sets the implementation for the gemoetric mean.</p>
+ *
+ * @param geometricMeanImpl the UnivariateStatistic instance to use
+ * for computing the geometric mean
+ * @since 1.2
+ */
+ public synchronized void setGeometricMeanImpl(
+ UnivariateStatistic geometricMeanImpl) {
+ this.geometricMeanImpl = geometricMeanImpl;
+ }
+
+ /**
+ * Returns the currently configured kurtosis implementation.
+ *
+ * @return the UnivariateStatistic implementing the kurtosis
+ * @since 1.2
+ */
+ public synchronized UnivariateStatistic getKurtosisImpl() {
+ return kurtosisImpl;
+ }
+
+ /**
+ * <p>Sets the implementation for the kurtosis.</p>
+ *
+ * @param kurtosisImpl the UnivariateStatistic instance to use
+ * for computing the kurtosis
+ * @since 1.2
+ */
+ public synchronized void setKurtosisImpl(UnivariateStatistic kurtosisImpl) {
+ this.kurtosisImpl = kurtosisImpl;
+ }
+
+ /**
+ * Returns the currently configured maximum implementation.
+ *
+ * @return the UnivariateStatistic implementing the maximum
+ * @since 1.2
+ */
+ public synchronized UnivariateStatistic getMaxImpl() {
+ return maxImpl;
+ }
+
+ /**
+ * <p>Sets the implementation for the maximum.</p>
+ *
+ * @param maxImpl the UnivariateStatistic instance to use
+ * for computing the maximum
+ * @since 1.2
+ */
+ public synchronized void setMaxImpl(UnivariateStatistic maxImpl) {
+ this.maxImpl = maxImpl;
+ }
+
+ /**
+ * Returns the currently configured minimum implementation.
+ *
+ * @return the UnivariateStatistic implementing the minimum
+ * @since 1.2
+ */
+ public synchronized UnivariateStatistic getMinImpl() {
+ return minImpl;
+ }
+
+ /**
+ * <p>Sets the implementation for the minimum.</p>
+ *
+ * @param minImpl the UnivariateStatistic instance to use
+ * for computing the minimum
+ * @since 1.2
+ */
+ public synchronized void setMinImpl(UnivariateStatistic minImpl) {
+ this.minImpl = minImpl;
+ }
+
+ /**
+ * Returns the currently configured percentile implementation.
+ *
+ * @return the UnivariateStatistic implementing the percentile
+ * @since 1.2
+ */
+ public synchronized UnivariateStatistic getPercentileImpl() {
+ return percentileImpl;
+ }
+
+ /**
+ * Sets the implementation to be used by {@link #getPercentile(double)}.
+ * The supplied <code>UnivariateStatistic</code> must provide a
+ * <code>setQuantile(double)</code> method; otherwise
+ * <code>IllegalArgumentException</code> is thrown.
+ *
+ * @param percentileImpl the percentileImpl to set
+ * @throws IllegalArgumentException if the supplied implementation does not
+ * provide a <code>setQuantile</code> method
+ * @since 1.2
+ */
+ public synchronized void setPercentileImpl(
+ UnivariateStatistic percentileImpl) {
+ try {
+ percentileImpl.getClass().getMethod(SET_QUANTILE_METHOD_NAME,
+ new Class[] {Double.TYPE}).invoke(percentileImpl,
+ new Object[] {Double.valueOf(50.0d)});
+ } catch (NoSuchMethodException e1) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.PERCENTILE_IMPLEMENTATION_UNSUPPORTED_METHOD,
+ percentileImpl.getClass().getName(), SET_QUANTILE_METHOD_NAME);
+ } catch (IllegalAccessException e2) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.PERCENTILE_IMPLEMENTATION_CANNOT_ACCESS_METHOD,
+ SET_QUANTILE_METHOD_NAME, percentileImpl.getClass().getName());
+ } catch (InvocationTargetException e3) {
+ throw MathRuntimeException.createIllegalArgumentException(e3.getCause());
+ }
+ this.percentileImpl = percentileImpl;
+ }
+
+ /**
+ * Returns the currently configured skewness implementation.
+ *
+ * @return the UnivariateStatistic implementing the skewness
+ * @since 1.2
+ */
+ public synchronized UnivariateStatistic getSkewnessImpl() {
+ return skewnessImpl;
+ }
+
+ /**
+ * <p>Sets the implementation for the skewness.</p>
+ *
+ * @param skewnessImpl the UnivariateStatistic instance to use
+ * for computing the skewness
+ * @since 1.2
+ */
+ public synchronized void setSkewnessImpl(
+ UnivariateStatistic skewnessImpl) {
+ this.skewnessImpl = skewnessImpl;
+ }
+
+ /**
+ * Returns the currently configured variance implementation.
+ *
+ * @return the UnivariateStatistic implementing the variance
+ * @since 1.2
+ */
+ public synchronized UnivariateStatistic getVarianceImpl() {
+ return varianceImpl;
+ }
+
+ /**
+ * <p>Sets the implementation for the variance.</p>
+ *
+ * @param varianceImpl the UnivariateStatistic instance to use
+ * for computing the variance
+ * @since 1.2
+ */
+ public synchronized void setVarianceImpl(
+ UnivariateStatistic varianceImpl) {
+ this.varianceImpl = varianceImpl;
+ }
+
+ /**
+ * Returns the currently configured sum of squares implementation.
+ *
+ * @return the UnivariateStatistic implementing the sum of squares
+ * @since 1.2
+ */
+ public synchronized UnivariateStatistic getSumsqImpl() {
+ return sumsqImpl;
+ }
+
+ /**
+ * <p>Sets the implementation for the sum of squares.</p>
+ *
+ * @param sumsqImpl the UnivariateStatistic instance to use
+ * for computing the sum of squares
+ * @since 1.2
+ */
+ public synchronized void setSumsqImpl(UnivariateStatistic sumsqImpl) {
+ this.sumsqImpl = sumsqImpl;
+ }
+
+ /**
+ * Returns the currently configured sum implementation.
+ *
+ * @return the UnivariateStatistic implementing the sum
+ * @since 1.2
+ */
+ public synchronized UnivariateStatistic getSumImpl() {
+ return sumImpl;
+ }
+
+ /**
+ * <p>Sets the implementation for the sum.</p>
+ *
+ * @param sumImpl the UnivariateStatistic instance to use
+ * for computing the sum
+ * @since 1.2
+ */
+ public synchronized void setSumImpl(UnivariateStatistic sumImpl) {
+ this.sumImpl = sumImpl;
+ }
+
+ /**
+ * Returns a copy of this DescriptiveStatistics instance with the same internal state.
+ *
+ * @return a copy of this
+ */
+ public DescriptiveStatistics copy() {
+ DescriptiveStatistics result = new DescriptiveStatistics();
+ copy(this, result);
+ return result;
+ }
+
+ /**
+ * Copies source to dest.
+ * <p>Neither source nor dest can be null.</p>
+ *
+ * @param source DescriptiveStatistics to copy
+ * @param dest DescriptiveStatistics to copy to
+ * @throws NullPointerException if either source or dest is null
+ */
+ public static void copy(DescriptiveStatistics source, DescriptiveStatistics dest) {
+ // Copy data and window size
+ dest.eDA = source.eDA.copy();
+ dest.windowSize = source.windowSize;
+
+ // Copy implementations
+ dest.maxImpl = source.maxImpl.copy();
+ dest.meanImpl = source.meanImpl.copy();
+ dest.minImpl = source.minImpl.copy();
+ dest.sumImpl = source.sumImpl.copy();
+ dest.varianceImpl = source.varianceImpl.copy();
+ dest.sumsqImpl = source.sumsqImpl.copy();
+ dest.geometricMeanImpl = source.geometricMeanImpl.copy();
+ dest.kurtosisImpl = source.kurtosisImpl;
+ dest.skewnessImpl = source.skewnessImpl;
+ dest.percentileImpl = source.percentileImpl;
+ }
+}
diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/MultivariateSummaryStatistics.java b/src/main/java/org/apache/commons/math/stat/descriptive/MultivariateSummaryStatistics.java
new file mode 100644
index 0000000..8062f5b
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/descriptive/MultivariateSummaryStatistics.java
@@ -0,0 +1,637 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.descriptive;
+
+import java.io.Serializable;
+import java.util.Arrays;
+
+import org.apache.commons.math.DimensionMismatchException;
+import org.apache.commons.math.MathRuntimeException;
+import org.apache.commons.math.exception.util.LocalizedFormats;
+import org.apache.commons.math.linear.RealMatrix;
+import org.apache.commons.math.stat.descriptive.moment.GeometricMean;
+import org.apache.commons.math.stat.descriptive.moment.Mean;
+import org.apache.commons.math.stat.descriptive.moment.VectorialCovariance;
+import org.apache.commons.math.stat.descriptive.rank.Max;
+import org.apache.commons.math.stat.descriptive.rank.Min;
+import org.apache.commons.math.stat.descriptive.summary.Sum;
+import org.apache.commons.math.stat.descriptive.summary.SumOfLogs;
+import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
+import org.apache.commons.math.util.MathUtils;
+import org.apache.commons.math.util.FastMath;
+
+/**
+ * <p>Computes summary statistics for a stream of n-tuples added using the
+ * {@link #addValue(double[]) addValue} method. The data values are not stored
+ * in memory, so this class can be used to compute statistics for very large
+ * n-tuple streams.</p>
+ *
+ * <p>The {@link StorelessUnivariateStatistic} instances used to maintain
+ * summary state and compute statistics are configurable via setters.
+ * For example, the default implementation for the mean can be overridden by
+ * calling {@link #setMeanImpl(StorelessUnivariateStatistic[])}. Actual
+ * parameters to these methods must implement the
+ * {@link StorelessUnivariateStatistic} interface and configuration must be
+ * completed before <code>addValue</code> is called. No configuration is
+ * necessary to use the default, commons-math provided implementations.</p>
+ *
+ * <p>To compute statistics for a stream of n-tuples, construct a
+ * MultivariateStatistics instance with dimension n and then use
+ * {@link #addValue(double[])} to add n-tuples. The <code>getXxx</code>
+ * methods where Xxx is a statistic return an array of <code>double</code>
+ * values, where for <code>i = 0,...,n-1</code> the i<sup>th</sup> array element is the
+ * value of the given statistic for data range consisting of the i<sup>th</sup> element of
+ * each of the input n-tuples. For example, if <code>addValue</code> is called
+ * with actual parameters {0, 1, 2}, then {3, 4, 5} and finally {6, 7, 8},
+ * <code>getSum</code> will return a three-element array with values
+ * {0+3+6, 1+4+7, 2+5+8}</p>
+ *
+ * <p>Note: This class is not thread-safe. Use
+ * {@link SynchronizedMultivariateSummaryStatistics} if concurrent access from multiple
+ * threads is required.</p>
+ *
+ * @since 1.2
+ * @version $Revision: 1042376 $ $Date: 2010-12-05 16:54:55 +0100 (dim. 05 déc. 2010) $
+ */
+public class MultivariateSummaryStatistics
+ implements StatisticalMultivariateSummary, Serializable {
+
+ /** Serialization UID */
+ private static final long serialVersionUID = 2271900808994826718L;
+
+ /** Dimension of the data. */
+ private int k;
+
+ /** Count of values that have been added */
+ private long n = 0;
+
+ /** Sum statistic implementation - can be reset by setter. */
+ private StorelessUnivariateStatistic[] sumImpl;
+
+ /** Sum of squares statistic implementation - can be reset by setter. */
+ private StorelessUnivariateStatistic[] sumSqImpl;
+
+ /** Minimum statistic implementation - can be reset by setter. */
+ private StorelessUnivariateStatistic[] minImpl;
+
+ /** Maximum statistic implementation - can be reset by setter. */
+ private StorelessUnivariateStatistic[] maxImpl;
+
+ /** Sum of log statistic implementation - can be reset by setter. */
+ private StorelessUnivariateStatistic[] sumLogImpl;
+
+ /** Geometric mean statistic implementation - can be reset by setter. */
+ private StorelessUnivariateStatistic[] geoMeanImpl;
+
+ /** Mean statistic implementation - can be reset by setter. */
+ private StorelessUnivariateStatistic[] meanImpl;
+
+ /** Covariance statistic implementation - cannot be reset. */
+ private VectorialCovariance covarianceImpl;
+
+ /**
+ * Construct a MultivariateSummaryStatistics instance
+ * @param k dimension of the data
+ * @param isCovarianceBiasCorrected if true, the unbiased sample
+ * covariance is computed, otherwise the biased population covariance
+ * is computed
+ */
+ public MultivariateSummaryStatistics(int k, boolean isCovarianceBiasCorrected) {
+ this.k = k;
+
+ sumImpl = new StorelessUnivariateStatistic[k];
+ sumSqImpl = new StorelessUnivariateStatistic[k];
+ minImpl = new StorelessUnivariateStatistic[k];
+ maxImpl = new StorelessUnivariateStatistic[k];
+ sumLogImpl = new StorelessUnivariateStatistic[k];
+ geoMeanImpl = new StorelessUnivariateStatistic[k];
+ meanImpl = new StorelessUnivariateStatistic[k];
+
+ for (int i = 0; i < k; ++i) {
+ sumImpl[i] = new Sum();
+ sumSqImpl[i] = new SumOfSquares();
+ minImpl[i] = new Min();
+ maxImpl[i] = new Max();
+ sumLogImpl[i] = new SumOfLogs();
+ geoMeanImpl[i] = new GeometricMean();
+ meanImpl[i] = new Mean();
+ }
+
+ covarianceImpl =
+ new VectorialCovariance(k, isCovarianceBiasCorrected);
+
+ }
+
+ /**
+ * Add an n-tuple to the data
+ *
+ * @param value the n-tuple to add
+ * @throws DimensionMismatchException if the length of the array
+ * does not match the one used at construction
+ */
+ public void addValue(double[] value)
+ throws DimensionMismatchException {
+ checkDimension(value.length);
+ for (int i = 0; i < k; ++i) {
+ double v = value[i];
+ sumImpl[i].increment(v);
+ sumSqImpl[i].increment(v);
+ minImpl[i].increment(v);
+ maxImpl[i].increment(v);
+ sumLogImpl[i].increment(v);
+ geoMeanImpl[i].increment(v);
+ meanImpl[i].increment(v);
+ }
+ covarianceImpl.increment(value);
+ n++;
+ }
+
+ /**
+ * Returns the dimension of the data
+ * @return The dimension of the data
+ */
+ public int getDimension() {
+ return k;
+ }
+
+ /**
+ * Returns the number of available values
+ * @return The number of available values
+ */
+ public long getN() {
+ return n;
+ }
+
+ /**
+ * Returns an array of the results of a statistic.
+ * @param stats univariate statistic array
+ * @return results array
+ */
+ private double[] getResults(StorelessUnivariateStatistic[] stats) {
+ double[] results = new double[stats.length];
+ for (int i = 0; i < results.length; ++i) {
+ results[i] = stats[i].getResult();
+ }
+ return results;
+ }
+
+ /**
+ * Returns an array whose i<sup>th</sup> entry is the sum of the
+ * i<sup>th</sup> entries of the arrays that have been added using
+ * {@link #addValue(double[])}
+ *
+ * @return the array of component sums
+ */
+ public double[] getSum() {
+ return getResults(sumImpl);
+ }
+
+ /**
+ * Returns an array whose i<sup>th</sup> entry is the sum of squares of the
+ * i<sup>th</sup> entries of the arrays that have been added using
+ * {@link #addValue(double[])}
+ *
+ * @return the array of component sums of squares
+ */
+ public double[] getSumSq() {
+ return getResults(sumSqImpl);
+ }
+
+ /**
+ * Returns an array whose i<sup>th</sup> entry is the sum of logs of the
+ * i<sup>th</sup> entries of the arrays that have been added using
+ * {@link #addValue(double[])}
+ *
+ * @return the array of component log sums
+ */
+ public double[] getSumLog() {
+ return getResults(sumLogImpl);
+ }
+
+ /**
+ * Returns an array whose i<sup>th</sup> entry is the mean of the
+ * i<sup>th</sup> entries of the arrays that have been added using
+ * {@link #addValue(double[])}
+ *
+ * @return the array of component means
+ */
+ public double[] getMean() {
+ return getResults(meanImpl);
+ }
+
+ /**
+ * Returns an array whose i<sup>th</sup> entry is the standard deviation of the
+ * i<sup>th</sup> entries of the arrays that have been added using
+ * {@link #addValue(double[])}
+ *
+ * @return the array of component standard deviations
+ */
+ public double[] getStandardDeviation() {
+ double[] stdDev = new double[k];
+ if (getN() < 1) {
+ Arrays.fill(stdDev, Double.NaN);
+ } else if (getN() < 2) {
+ Arrays.fill(stdDev, 0.0);
+ } else {
+ RealMatrix matrix = covarianceImpl.getResult();
+ for (int i = 0; i < k; ++i) {
+ stdDev[i] = FastMath.sqrt(matrix.getEntry(i, i));
+ }
+ }
+ return stdDev;
+ }
+
+ /**
+ * Returns the covariance matrix of the values that have been added.
+ *
+ * @return the covariance matrix
+ */
+ public RealMatrix getCovariance() {
+ return covarianceImpl.getResult();
+ }
+
+ /**
+ * Returns an array whose i<sup>th</sup> entry is the maximum of the
+ * i<sup>th</sup> entries of the arrays that have been added using
+ * {@link #addValue(double[])}
+ *
+ * @return the array of component maxima
+ */
+ public double[] getMax() {
+ return getResults(maxImpl);
+ }
+
+ /**
+ * Returns an array whose i<sup>th</sup> entry is the minimum of the
+ * i<sup>th</sup> entries of the arrays that have been added using
+ * {@link #addValue(double[])}
+ *
+ * @return the array of component minima
+ */
+ public double[] getMin() {
+ return getResults(minImpl);
+ }
+
+ /**
+ * Returns an array whose i<sup>th</sup> entry is the geometric mean of the
+ * i<sup>th</sup> entries of the arrays that have been added using
+ * {@link #addValue(double[])}
+ *
+ * @return the array of component geometric means
+ */
+ public double[] getGeometricMean() {
+ return getResults(geoMeanImpl);
+ }
+
+ /**
+ * Generates a text report displaying
+ * summary statistics from values that
+ * have been added.
+ * @return String with line feeds displaying statistics
+ */
+ @Override
+ public String toString() {
+ final String separator = ", ";
+ final String suffix = System.getProperty("line.separator");
+ StringBuilder outBuffer = new StringBuilder();
+ outBuffer.append("MultivariateSummaryStatistics:" + suffix);
+ outBuffer.append("n: " + getN() + suffix);
+ append(outBuffer, getMin(), "min: ", separator, suffix);
+ append(outBuffer, getMax(), "max: ", separator, suffix);
+ append(outBuffer, getMean(), "mean: ", separator, suffix);
+ append(outBuffer, getGeometricMean(), "geometric mean: ", separator, suffix);
+ append(outBuffer, getSumSq(), "sum of squares: ", separator, suffix);
+ append(outBuffer, getSumLog(), "sum of logarithms: ", separator, suffix);
+ append(outBuffer, getStandardDeviation(), "standard deviation: ", separator, suffix);
+ outBuffer.append("covariance: " + getCovariance().toString() + suffix);
+ return outBuffer.toString();
+ }
+
+ /**
+ * Append a text representation of an array to a buffer.
+ * @param buffer buffer to fill
+ * @param data data array
+ * @param prefix text prefix
+ * @param separator elements separator
+ * @param suffix text suffix
+ */
+ private void append(StringBuilder buffer, double[] data,
+ String prefix, String separator, String suffix) {
+ buffer.append(prefix);
+ for (int i = 0; i < data.length; ++i) {
+ if (i > 0) {
+ buffer.append(separator);
+ }
+ buffer.append(data[i]);
+ }
+ buffer.append(suffix);
+ }
+
+ /**
+ * Resets all statistics and storage
+ */
+ public void clear() {
+ this.n = 0;
+ for (int i = 0; i < k; ++i) {
+ minImpl[i].clear();
+ maxImpl[i].clear();
+ sumImpl[i].clear();
+ sumLogImpl[i].clear();
+ sumSqImpl[i].clear();
+ geoMeanImpl[i].clear();
+ meanImpl[i].clear();
+ }
+ covarianceImpl.clear();
+ }
+
+ /**
+ * Returns true iff <code>object</code> is a <code>MultivariateSummaryStatistics</code>
+ * instance and all statistics have the same values as this.
+ * @param object the object to test equality against.
+ * @return true if object equals this
+ */
+ @Override
+ public boolean equals(Object object) {
+ if (object == this ) {
+ return true;
+ }
+ if (object instanceof MultivariateSummaryStatistics == false) {
+ return false;
+ }
+ MultivariateSummaryStatistics stat = (MultivariateSummaryStatistics) object;
+ return MathUtils.equalsIncludingNaN(stat.getGeometricMean(), getGeometricMean()) &&
+ MathUtils.equalsIncludingNaN(stat.getMax(), getMax()) &&
+ MathUtils.equalsIncludingNaN(stat.getMean(), getMean()) &&
+ MathUtils.equalsIncludingNaN(stat.getMin(), getMin()) &&
+ MathUtils.equalsIncludingNaN(stat.getN(), getN()) &&
+ MathUtils.equalsIncludingNaN(stat.getSum(), getSum()) &&
+ MathUtils.equalsIncludingNaN(stat.getSumSq(), getSumSq()) &&
+ MathUtils.equalsIncludingNaN(stat.getSumLog(), getSumLog()) &&
+ stat.getCovariance().equals( getCovariance());
+ }
+
+ /**
+ * Returns hash code based on values of statistics
+ *
+ * @return hash code
+ */
+ @Override
+ public int hashCode() {
+ int result = 31 + MathUtils.hash(getGeometricMean());
+ result = result * 31 + MathUtils.hash(getGeometricMean());
+ result = result * 31 + MathUtils.hash(getMax());
+ result = result * 31 + MathUtils.hash(getMean());
+ result = result * 31 + MathUtils.hash(getMin());
+ result = result * 31 + MathUtils.hash(getN());
+ result = result * 31 + MathUtils.hash(getSum());
+ result = result * 31 + MathUtils.hash(getSumSq());
+ result = result * 31 + MathUtils.hash(getSumLog());
+ result = result * 31 + getCovariance().hashCode();
+ return result;
+ }
+
+ // Getters and setters for statistics implementations
+ /**
+ * Sets statistics implementations.
+ * @param newImpl new implementations for statistics
+ * @param oldImpl old implementations for statistics
+ * @throws DimensionMismatchException if the array dimension
+ * does not match the one used at construction
+ * @throws IllegalStateException if data has already been added
+ * (i.e if n > 0)
+ */
+ private void setImpl(StorelessUnivariateStatistic[] newImpl,
+ StorelessUnivariateStatistic[] oldImpl)
+ throws DimensionMismatchException, IllegalStateException {
+ checkEmpty();
+ checkDimension(newImpl.length);
+ System.arraycopy(newImpl, 0, oldImpl, 0, newImpl.length);
+ }
+
+ /**
+ * Returns the currently configured Sum implementation
+ *
+ * @return the StorelessUnivariateStatistic implementing the sum
+ */
+ public StorelessUnivariateStatistic[] getSumImpl() {
+ return sumImpl.clone();
+ }
+
+ /**
+ * <p>Sets the implementation for the Sum.</p>
+ * <p>This method must be activated before any data has been added - i.e.,
+ * before {@link #addValue(double[]) addValue} has been used to add data;
+ * otherwise an IllegalStateException will be thrown.</p>
+ *
+ * @param sumImpl the StorelessUnivariateStatistic instance to use
+ * for computing the Sum
+ * @throws DimensionMismatchException if the array dimension
+ * does not match the one used at construction
+ * @throws IllegalStateException if data has already been added
+ * (i.e if n > 0)
+ */
+ public void setSumImpl(StorelessUnivariateStatistic[] sumImpl)
+ throws DimensionMismatchException {
+ setImpl(sumImpl, this.sumImpl);
+ }
+
+ /**
+ * Returns the currently configured sum of squares implementation
+ *
+ * @return the StorelessUnivariateStatistic implementing the sum of squares
+ */
+ public StorelessUnivariateStatistic[] getSumsqImpl() {
+ return sumSqImpl.clone();
+ }
+
+ /**
+ * <p>Sets the implementation for the sum of squares.</p>
+ * <p>This method must be activated before any data has been added - i.e.,
+ * before {@link #addValue(double[]) addValue} has been used to add data;
+ * otherwise an IllegalStateException will be thrown.</p>
+ *
+ * @param sumsqImpl the StorelessUnivariateStatistic instance to use
+ * for computing the sum of squares
+ * @throws DimensionMismatchException if the array dimension
+ * does not match the one used at construction
+ * @throws IllegalStateException if data has already been added
+ * (i.e if n > 0)
+ */
+ public void setSumsqImpl(StorelessUnivariateStatistic[] sumsqImpl)
+ throws DimensionMismatchException {
+ setImpl(sumsqImpl, this.sumSqImpl);
+ }
+
+ /**
+ * Returns the currently configured minimum implementation
+ *
+ * @return the StorelessUnivariateStatistic implementing the minimum
+ */
+ public StorelessUnivariateStatistic[] getMinImpl() {
+ return minImpl.clone();
+ }
+
+ /**
+ * <p>Sets the implementation for the minimum.</p>
+ * <p>This method must be activated before any data has been added - i.e.,
+ * before {@link #addValue(double[]) addValue} has been used to add data;
+ * otherwise an IllegalStateException will be thrown.</p>
+ *
+ * @param minImpl the StorelessUnivariateStatistic instance to use
+ * for computing the minimum
+ * @throws DimensionMismatchException if the array dimension
+ * does not match the one used at construction
+ * @throws IllegalStateException if data has already been added
+ * (i.e if n > 0)
+ */
+ public void setMinImpl(StorelessUnivariateStatistic[] minImpl)
+ throws DimensionMismatchException {
+ setImpl(minImpl, this.minImpl);
+ }
+
+ /**
+ * Returns the currently configured maximum implementation
+ *
+ * @return the StorelessUnivariateStatistic implementing the maximum
+ */
+ public StorelessUnivariateStatistic[] getMaxImpl() {
+ return maxImpl.clone();
+ }
+
+ /**
+ * <p>Sets the implementation for the maximum.</p>
+ * <p>This method must be activated before any data has been added - i.e.,
+ * before {@link #addValue(double[]) addValue} has been used to add data;
+ * otherwise an IllegalStateException will be thrown.</p>
+ *
+ * @param maxImpl the StorelessUnivariateStatistic instance to use
+ * for computing the maximum
+ * @throws DimensionMismatchException if the array dimension
+ * does not match the one used at construction
+ * @throws IllegalStateException if data has already been added
+ * (i.e if n > 0)
+ */
+ public void setMaxImpl(StorelessUnivariateStatistic[] maxImpl)
+ throws DimensionMismatchException {
+ setImpl(maxImpl, this.maxImpl);
+ }
+
+ /**
+ * Returns the currently configured sum of logs implementation
+ *
+ * @return the StorelessUnivariateStatistic implementing the log sum
+ */
+ public StorelessUnivariateStatistic[] getSumLogImpl() {
+ return sumLogImpl.clone();
+ }
+
+ /**
+ * <p>Sets the implementation for the sum of logs.</p>
+ * <p>This method must be activated before any data has been added - i.e.,
+ * before {@link #addValue(double[]) addValue} has been used to add data;
+ * otherwise an IllegalStateException will be thrown.</p>
+ *
+ * @param sumLogImpl the StorelessUnivariateStatistic instance to use
+ * for computing the log sum
+ * @throws DimensionMismatchException if the array dimension
+ * does not match the one used at construction
+ * @throws IllegalStateException if data has already been added
+ * (i.e if n > 0)
+ */
+ public void setSumLogImpl(StorelessUnivariateStatistic[] sumLogImpl)
+ throws DimensionMismatchException {
+ setImpl(sumLogImpl, this.sumLogImpl);
+ }
+
+ /**
+ * Returns the currently configured geometric mean implementation
+ *
+ * @return the StorelessUnivariateStatistic implementing the geometric mean
+ */
+ public StorelessUnivariateStatistic[] getGeoMeanImpl() {
+ return geoMeanImpl.clone();
+ }
+
+ /**
+ * <p>Sets the implementation for the geometric mean.</p>
+ * <p>This method must be activated before any data has been added - i.e.,
+ * before {@link #addValue(double[]) addValue} has been used to add data;
+ * otherwise an IllegalStateException will be thrown.</p>
+ *
+ * @param geoMeanImpl the StorelessUnivariateStatistic instance to use
+ * for computing the geometric mean
+ * @throws DimensionMismatchException if the array dimension
+ * does not match the one used at construction
+ * @throws IllegalStateException if data has already been added
+ * (i.e if n > 0)
+ */
+ public void setGeoMeanImpl(StorelessUnivariateStatistic[] geoMeanImpl)
+ throws DimensionMismatchException {
+ setImpl(geoMeanImpl, this.geoMeanImpl);
+ }
+
+ /**
+ * Returns the currently configured mean implementation
+ *
+ * @return the StorelessUnivariateStatistic implementing the mean
+ */
+ public StorelessUnivariateStatistic[] getMeanImpl() {
+ return meanImpl.clone();
+ }
+
+ /**
+ * <p>Sets the implementation for the mean.</p>
+ * <p>This method must be activated before any data has been added - i.e.,
+ * before {@link #addValue(double[]) addValue} has been used to add data;
+ * otherwise an IllegalStateException will be thrown.</p>
+ *
+ * @param meanImpl the StorelessUnivariateStatistic instance to use
+ * for computing the mean
+ * @throws DimensionMismatchException if the array dimension
+ * does not match the one used at construction
+ * @throws IllegalStateException if data has already been added
+ * (i.e if n > 0)
+ */
+ public void setMeanImpl(StorelessUnivariateStatistic[] meanImpl)
+ throws DimensionMismatchException {
+ setImpl(meanImpl, this.meanImpl);
+ }
+
+ /**
+ * Throws IllegalStateException if n > 0.
+ */
+ private void checkEmpty() {
+ if (n > 0) {
+ throw MathRuntimeException.createIllegalStateException(
+ LocalizedFormats.VALUES_ADDED_BEFORE_CONFIGURING_STATISTIC,
+ n);
+ }
+ }
+
+ /**
+ * Throws DimensionMismatchException if dimension != k.
+ * @param dimension dimension to check
+ * @throws DimensionMismatchException if dimension != k
+ */
+ private void checkDimension(int dimension)
+ throws DimensionMismatchException {
+ if (dimension != k) {
+ throw new DimensionMismatchException(dimension, k);
+ }
+ }
+
+}
diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/StatisticalMultivariateSummary.java b/src/main/java/org/apache/commons/math/stat/descriptive/StatisticalMultivariateSummary.java
new file mode 100644
index 0000000..517788c
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/descriptive/StatisticalMultivariateSummary.java
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.descriptive;
+
+import org.apache.commons.math.linear.RealMatrix;
+
+/**
+ * Reporting interface for basic multivariate statistics.
+ *
+ * @since 1.2
+ * @version $Revision: 811786 $ $Date: 2009-09-06 11:36:08 +0200 (dim. 06 sept. 2009) $
+ */
+public interface StatisticalMultivariateSummary {
+
+ /**
+ * Returns the dimension of the data
+ * @return The dimension of the data
+ */
+ int getDimension();
+
+ /**
+ * Returns an array whose i<sup>th</sup> entry is the
+ * mean of the i<sup>th</sup> entries of the arrays
+ * that correspond to each multivariate sample
+ *
+ * @return the array of component means
+ */
+ double[] getMean();
+
+ /**
+ * Returns the covariance of the available values.
+ * @return The covariance, null if no multivariate sample
+ * have been added or a zeroed matrix for a single value set.
+ */
+ RealMatrix getCovariance();
+
+ /**
+ * Returns an array whose i<sup>th</sup> entry is the
+ * standard deviation of the i<sup>th</sup> entries of the arrays
+ * that correspond to each multivariate sample
+ *
+ * @return the array of component standard deviations
+ */
+ double[] getStandardDeviation();
+
+ /**
+ * Returns an array whose i<sup>th</sup> entry is the
+ * maximum of the i<sup>th</sup> entries of the arrays
+ * that correspond to each multivariate sample
+ *
+ * @return the array of component maxima
+ */
+ double[] getMax();
+
+ /**
+ * Returns an array whose i<sup>th</sup> entry is the
+ * minimum of the i<sup>th</sup> entries of the arrays
+ * that correspond to each multivariate sample
+ *
+ * @return the array of component minima
+ */
+ double[] getMin();
+
+ /**
+ * Returns the number of available values
+ * @return The number of available values
+ */
+ long getN();
+
+ /**
+ * Returns an array whose i<sup>th</sup> entry is the
+ * geometric mean of the i<sup>th</sup> entries of the arrays
+ * that correspond to each multivariate sample
+ *
+ * @return the array of component geometric means
+ */
+ double[] getGeometricMean();
+
+ /**
+ * Returns an array whose i<sup>th</sup> entry is the
+ * sum of the i<sup>th</sup> entries of the arrays
+ * that correspond to each multivariate sample
+ *
+ * @return the array of component sums
+ */
+ double[] getSum();
+
+ /**
+ * Returns an array whose i<sup>th</sup> entry is the
+ * sum of squares of the i<sup>th</sup> entries of the arrays
+ * that correspond to each multivariate sample
+ *
+ * @return the array of component sums of squares
+ */
+ double[] getSumSq();
+
+ /**
+ * Returns an array whose i<sup>th</sup> entry is the
+ * sum of logs of the i<sup>th</sup> entries of the arrays
+ * that correspond to each multivariate sample
+ *
+ * @return the array of component log sums
+ */
+ double[] getSumLog();
+
+}
diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/StatisticalSummary.java b/src/main/java/org/apache/commons/math/stat/descriptive/StatisticalSummary.java
new file mode 100644
index 0000000..5592053
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/descriptive/StatisticalSummary.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.descriptive;
+
+/**
+ * Reporting interface for basic univariate statistics.
+ *
+ * @version $Revision: 811786 $ $Date: 2009-09-06 11:36:08 +0200 (dim. 06 sept. 2009) $
+ */
+public interface StatisticalSummary {
+
+ /**
+ * Returns the <a href="http://www.xycoon.com/arithmetic_mean.htm">
+ * arithmetic mean </a> of the available values
+ * @return The mean or Double.NaN if no values have been added.
+ */
+ double getMean();
+ /**
+ * Returns the variance of the available values.
+ * @return The variance, Double.NaN if no values have been added
+ * or 0.0 for a single value set.
+ */
+ double getVariance();
+ /**
+ * Returns the standard deviation of the available values.
+ * @return The standard deviation, Double.NaN if no values have been added
+ * or 0.0 for a single value set.
+ */
+ double getStandardDeviation();
+ /**
+ * Returns the maximum of the available values
+ * @return The max or Double.NaN if no values have been added.
+ */
+ double getMax();
+ /**
+ * Returns the minimum of the available values
+ * @return The min or Double.NaN if no values have been added.
+ */
+ double getMin();
+ /**
+ * Returns the number of available values
+ * @return The number of available values
+ */
+ long getN();
+ /**
+ * Returns the sum of the values that have been added to Univariate.
+ * @return The sum or Double.NaN if no values have been added
+ */
+ double getSum();
+
+}
diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/StatisticalSummaryValues.java b/src/main/java/org/apache/commons/math/stat/descriptive/StatisticalSummaryValues.java
new file mode 100644
index 0000000..e72639a
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/descriptive/StatisticalSummaryValues.java
@@ -0,0 +1,186 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.descriptive;
+
+import java.io.Serializable;
+
+import org.apache.commons.math.util.FastMath;
+import org.apache.commons.math.util.MathUtils;
+
+/**
+ * Value object representing the results of a univariate statistical summary.
+ *
+ * @version $Revision: 1054186 $ $Date: 2011-01-01 03:28:46 +0100 (sam. 01 janv. 2011) $
+ */
+public class StatisticalSummaryValues implements Serializable,
+ StatisticalSummary {
+
+ /** Serialization id */
+ private static final long serialVersionUID = -5108854841843722536L;
+
+ /** The sample mean */
+ private final double mean;
+
+ /** The sample variance */
+ private final double variance;
+
+ /** The number of observations in the sample */
+ private final long n;
+
+ /** The maximum value */
+ private final double max;
+
+ /** The minimum value */
+ private final double min;
+
+ /** The sum of the sample values */
+ private final double sum;
+
+ /**
+ * Constructor
+ *
+ * @param mean the sample mean
+ * @param variance the sample variance
+ * @param n the number of observations in the sample
+ * @param max the maximum value
+ * @param min the minimum value
+ * @param sum the sum of the values
+ */
+ public StatisticalSummaryValues(double mean, double variance, long n,
+ double max, double min, double sum) {
+ super();
+ this.mean = mean;
+ this.variance = variance;
+ this.n = n;
+ this.max = max;
+ this.min = min;
+ this.sum = sum;
+ }
+
+ /**
+ * @return Returns the max.
+ */
+ public double getMax() {
+ return max;
+ }
+
+ /**
+ * @return Returns the mean.
+ */
+ public double getMean() {
+ return mean;
+ }
+
+ /**
+ * @return Returns the min.
+ */
+ public double getMin() {
+ return min;
+ }
+
+ /**
+ * @return Returns the number of values.
+ */
+ public long getN() {
+ return n;
+ }
+
+ /**
+ * @return Returns the sum.
+ */
+ public double getSum() {
+ return sum;
+ }
+
+ /**
+ * @return Returns the standard deviation
+ */
+ public double getStandardDeviation() {
+ return FastMath.sqrt(variance);
+ }
+
+ /**
+ * @return Returns the variance.
+ */
+ public double getVariance() {
+ return variance;
+ }
+
+ /**
+ * Returns true iff <code>object</code> is a
+ * <code>StatisticalSummaryValues</code> instance and all statistics have
+ * the same values as this.
+ *
+ * @param object the object to test equality against.
+ * @return true if object equals this
+ */
+ @Override
+ public boolean equals(Object object) {
+ if (object == this ) {
+ return true;
+ }
+ if (object instanceof StatisticalSummaryValues == false) {
+ return false;
+ }
+ StatisticalSummaryValues stat = (StatisticalSummaryValues) object;
+ return MathUtils.equalsIncludingNaN(stat.getMax(), getMax()) &&
+ MathUtils.equalsIncludingNaN(stat.getMean(), getMean()) &&
+ MathUtils.equalsIncludingNaN(stat.getMin(), getMin()) &&
+ MathUtils.equalsIncludingNaN(stat.getN(), getN()) &&
+ MathUtils.equalsIncludingNaN(stat.getSum(), getSum()) &&
+ MathUtils.equalsIncludingNaN(stat.getVariance(), getVariance());
+ }
+
+ /**
+ * Returns hash code based on values of statistics
+ *
+ * @return hash code
+ */
+ @Override
+ public int hashCode() {
+ int result = 31 + MathUtils.hash(getMax());
+ result = result * 31 + MathUtils.hash(getMean());
+ result = result * 31 + MathUtils.hash(getMin());
+ result = result * 31 + MathUtils.hash(getN());
+ result = result * 31 + MathUtils.hash(getSum());
+ result = result * 31 + MathUtils.hash(getVariance());
+ return result;
+ }
+
+ /**
+ * Generates a text report displaying values of statistics.
+ * Each statistic is displayed on a separate line.
+ *
+ * @return String with line feeds displaying statistics
+ */
+ @Override
+ public String toString() {
+ StringBuilder outBuffer = new StringBuilder();
+ String endl = "\n";
+ outBuffer.append("StatisticalSummaryValues:").append(endl);
+ outBuffer.append("n: ").append(getN()).append(endl);
+ outBuffer.append("min: ").append(getMin()).append(endl);
+ outBuffer.append("max: ").append(getMax()).append(endl);
+ outBuffer.append("mean: ").append(getMean()).append(endl);
+ outBuffer.append("std dev: ").append(getStandardDeviation())
+ .append(endl);
+ outBuffer.append("variance: ").append(getVariance()).append(endl);
+ outBuffer.append("sum: ").append(getSum()).append(endl);
+ return outBuffer.toString();
+ }
+
+}
diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/StorelessUnivariateStatistic.java b/src/main/java/org/apache/commons/math/stat/descriptive/StorelessUnivariateStatistic.java
new file mode 100644
index 0000000..9b9fcb4
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/descriptive/StorelessUnivariateStatistic.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.descriptive;
+
+/**
+ * Extends the definition of {@link UnivariateStatistic} with
+ * {@link #increment} and {@link #incrementAll(double[])} methods for adding
+ * values and updating internal state.
+ * <p>
+ * This interface is designed to be used for calculating statistics that can be
+ * computed in one pass through the data without storing the full array of
+ * sample values.</p>
+ *
+ * @version $Revision: 811685 $ $Date: 2009-09-05 19:36:48 +0200 (sam. 05 sept. 2009) $
+ */
+public interface StorelessUnivariateStatistic extends UnivariateStatistic {
+
+ /**
+ * Updates the internal state of the statistic to reflect the addition of the new value.
+ * @param d the new value.
+ */
+ void increment(double d);
+
+ /**
+ * Updates the internal state of the statistic to reflect addition of
+ * all values in the values array. Does not clear the statistic first --
+ * i.e., the values are added <strong>incrementally</strong> to the dataset.
+ *
+ * @param values array holding the new values to add
+ * @throws IllegalArgumentException if the array is null
+ */
+ void incrementAll(double[] values);
+
+ /**
+ * Updates the internal state of the statistic to reflect addition of
+ * the values in the designated portion of the values array. Does not
+ * clear the statistic first -- i.e., the values are added
+ * <strong>incrementally</strong> to the dataset.
+ *
+ * @param values array holding the new values to add
+ * @param start the array index of the first value to add
+ * @param length the number of elements to add
+ * @throws IllegalArgumentException if the array is null or the index
+ */
+ void incrementAll(double[] values, int start, int length);
+
+ /**
+ * Returns the current value of the Statistic.
+ * @return value of the statistic, <code>Double.NaN</code> if it
+ * has been cleared or just instantiated.
+ */
+ double getResult();
+
+ /**
+ * Returns the number of values that have been added.
+ * @return the number of values.
+ */
+ long getN();
+
+ /**
+ * Clears the internal state of the Statistic
+ */
+ void clear();
+
+ /**
+ * Returns a copy of the statistic with the same internal state.
+ *
+ * @return a copy of the statistic
+ */
+ StorelessUnivariateStatistic copy();
+
+}
diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/SummaryStatistics.java b/src/main/java/org/apache/commons/math/stat/descriptive/SummaryStatistics.java
new file mode 100644
index 0000000..017a84d
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/descriptive/SummaryStatistics.java
@@ -0,0 +1,717 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.descriptive;
+
+import java.io.Serializable;
+
+import org.apache.commons.math.MathRuntimeException;
+import org.apache.commons.math.exception.util.LocalizedFormats;
+import org.apache.commons.math.stat.descriptive.moment.GeometricMean;
+import org.apache.commons.math.stat.descriptive.moment.Mean;
+import org.apache.commons.math.stat.descriptive.moment.SecondMoment;
+import org.apache.commons.math.stat.descriptive.moment.Variance;
+import org.apache.commons.math.stat.descriptive.rank.Max;
+import org.apache.commons.math.stat.descriptive.rank.Min;
+import org.apache.commons.math.stat.descriptive.summary.Sum;
+import org.apache.commons.math.stat.descriptive.summary.SumOfLogs;
+import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
+import org.apache.commons.math.util.MathUtils;
+import org.apache.commons.math.util.FastMath;
+
+/**
+ * <p>
+ * Computes summary statistics for a stream of data values added using the
+ * {@link #addValue(double) addValue} method. The data values are not stored in
+ * memory, so this class can be used to compute statistics for very large data
+ * streams.
+ * </p>
+ * <p>
+ * The {@link StorelessUnivariateStatistic} instances used to maintain summary
+ * state and compute statistics are configurable via setters. For example, the
+ * default implementation for the variance can be overridden by calling
+ * {@link #setVarianceImpl(StorelessUnivariateStatistic)}. Actual parameters to
+ * these methods must implement the {@link StorelessUnivariateStatistic}
+ * interface and configuration must be completed before <code>addValue</code>
+ * is called. No configuration is necessary to use the default, commons-math
+ * provided implementations.
+ * </p>
+ * <p>
+ * Note: This class is not thread-safe. Use
+ * {@link SynchronizedSummaryStatistics} if concurrent access from multiple
+ * threads is required.
+ * </p>
+ * @version $Revision: 1042376 $ $Date: 2010-12-05 16:54:55 +0100 (dim. 05 déc. 2010) $
+ */
+public class SummaryStatistics implements StatisticalSummary, Serializable {
+
+ /** Serialization UID */
+ private static final long serialVersionUID = -2021321786743555871L;
+
+ /** count of values that have been added */
+ protected long n = 0;
+
+ /** SecondMoment is used to compute the mean and variance */
+ protected SecondMoment secondMoment = new SecondMoment();
+
+ /** sum of values that have been added */
+ protected Sum sum = new Sum();
+
+ /** sum of the square of each value that has been added */
+ protected SumOfSquares sumsq = new SumOfSquares();
+
+ /** min of values that have been added */
+ protected Min min = new Min();
+
+ /** max of values that have been added */
+ protected Max max = new Max();
+
+ /** sumLog of values that have been added */
+ protected SumOfLogs sumLog = new SumOfLogs();
+
+ /** geoMean of values that have been added */
+ protected GeometricMean geoMean = new GeometricMean(sumLog);
+
+ /** mean of values that have been added */
+ protected Mean mean = new Mean();
+
+ /** variance of values that have been added */
+ protected Variance variance = new Variance();
+
+ /** Sum statistic implementation - can be reset by setter. */
+ private StorelessUnivariateStatistic sumImpl = sum;
+
+ /** Sum of squares statistic implementation - can be reset by setter. */
+ private StorelessUnivariateStatistic sumsqImpl = sumsq;
+
+ /** Minimum statistic implementation - can be reset by setter. */
+ private StorelessUnivariateStatistic minImpl = min;
+
+ /** Maximum statistic implementation - can be reset by setter. */
+ private StorelessUnivariateStatistic maxImpl = max;
+
+ /** Sum of log statistic implementation - can be reset by setter. */
+ private StorelessUnivariateStatistic sumLogImpl = sumLog;
+
+ /** Geometric mean statistic implementation - can be reset by setter. */
+ private StorelessUnivariateStatistic geoMeanImpl = geoMean;
+
+ /** Mean statistic implementation - can be reset by setter. */
+ private StorelessUnivariateStatistic meanImpl = mean;
+
+ /** Variance statistic implementation - can be reset by setter. */
+ private StorelessUnivariateStatistic varianceImpl = variance;
+
+ /**
+ * Construct a SummaryStatistics instance
+ */
+ public SummaryStatistics() {
+ }
+
+ /**
+ * A copy constructor. Creates a deep-copy of the {@code original}.
+ *
+ * @param original the {@code SummaryStatistics} instance to copy
+ */
+ public SummaryStatistics(SummaryStatistics original) {
+ copy(original, this);
+ }
+
+ /**
+ * Return a {@link StatisticalSummaryValues} instance reporting current
+ * statistics.
+ * @return Current values of statistics
+ */
+ public StatisticalSummary getSummary() {
+ return new StatisticalSummaryValues(getMean(), getVariance(), getN(),
+ getMax(), getMin(), getSum());
+ }
+
+ /**
+ * Add a value to the data
+ * @param value the value to add
+ */
+ public void addValue(double value) {
+ sumImpl.increment(value);
+ sumsqImpl.increment(value);
+ minImpl.increment(value);
+ maxImpl.increment(value);
+ sumLogImpl.increment(value);
+ secondMoment.increment(value);
+ // If mean, variance or geomean have been overridden,
+ // need to increment these
+ if (!(meanImpl instanceof Mean)) {
+ meanImpl.increment(value);
+ }
+ if (!(varianceImpl instanceof Variance)) {
+ varianceImpl.increment(value);
+ }
+ if (!(geoMeanImpl instanceof GeometricMean)) {
+ geoMeanImpl.increment(value);
+ }
+ n++;
+ }
+
+ /**
+ * Returns the number of available values
+ * @return The number of available values
+ */
+ public long getN() {
+ return n;
+ }
+
+ /**
+ * Returns the sum of the values that have been added
+ * @return The sum or <code>Double.NaN</code> if no values have been added
+ */
+ public double getSum() {
+ return sumImpl.getResult();
+ }
+
+ /**
+ * Returns the sum of the squares of the values that have been added.
+ * <p>
+ * Double.NaN is returned if no values have been added.
+ * </p>
+ * @return The sum of squares
+ */
+ public double getSumsq() {
+ return sumsqImpl.getResult();
+ }
+
+ /**
+ * Returns the mean of the values that have been added.
+ * <p>
+ * Double.NaN is returned if no values have been added.
+ * </p>
+ * @return the mean
+ */
+ public double getMean() {
+ if (mean == meanImpl) {
+ return new Mean(secondMoment).getResult();
+ } else {
+ return meanImpl.getResult();
+ }
+ }
+
+ /**
+ * Returns the standard deviation of the values that have been added.
+ * <p>
+ * Double.NaN is returned if no values have been added.
+ * </p>
+ * @return the standard deviation
+ */
+ public double getStandardDeviation() {
+ double stdDev = Double.NaN;
+ if (getN() > 0) {
+ if (getN() > 1) {
+ stdDev = FastMath.sqrt(getVariance());
+ } else {
+ stdDev = 0.0;
+ }
+ }
+ return stdDev;
+ }
+
+ /**
+ * Returns the variance of the values that have been added.
+ * <p>
+ * Double.NaN is returned if no values have been added.
+ * </p>
+ * @return the variance
+ */
+ public double getVariance() {
+ if (varianceImpl == variance) {
+ return new Variance(secondMoment).getResult();
+ } else {
+ return varianceImpl.getResult();
+ }
+ }
+
+ /**
+ * Returns the maximum of the values that have been added.
+ * <p>
+ * Double.NaN is returned if no values have been added.
+ * </p>
+ * @return the maximum
+ */
+ public double getMax() {
+ return maxImpl.getResult();
+ }
+
+ /**
+ * Returns the minimum of the values that have been added.
+ * <p>
+ * Double.NaN is returned if no values have been added.
+ * </p>
+ * @return the minimum
+ */
+ public double getMin() {
+ return minImpl.getResult();
+ }
+
+ /**
+ * Returns the geometric mean of the values that have been added.
+ * <p>
+ * Double.NaN is returned if no values have been added.
+ * </p>
+ * @return the geometric mean
+ */
+ public double getGeometricMean() {
+ return geoMeanImpl.getResult();
+ }
+
+ /**
+ * Returns the sum of the logs of the values that have been added.
+ * <p>
+ * Double.NaN is returned if no values have been added.
+ * </p>
+ * @return the sum of logs
+ * @since 1.2
+ */
+ public double getSumOfLogs() {
+ return sumLogImpl.getResult();
+ }
+
+ /**
+ * Returns a statistic related to the Second Central Moment. Specifically,
+ * what is returned is the sum of squared deviations from the sample mean
+ * among the values that have been added.
+ * <p>
+ * Returns <code>Double.NaN</code> if no data values have been added and
+ * returns <code>0</code> if there is just one value in the data set.</p>
+ * <p>
+ * @return second central moment statistic
+ * @since 2.0
+ */
+ public double getSecondMoment() {
+ return secondMoment.getResult();
+ }
+
+ /**
+ * Generates a text report displaying summary statistics from values that
+ * have been added.
+ * @return String with line feeds displaying statistics
+ * @since 1.2
+ */
+ @Override
+ public String toString() {
+ StringBuilder outBuffer = new StringBuilder();
+ String endl = "\n";
+ outBuffer.append("SummaryStatistics:").append(endl);
+ outBuffer.append("n: ").append(getN()).append(endl);
+ outBuffer.append("min: ").append(getMin()).append(endl);
+ outBuffer.append("max: ").append(getMax()).append(endl);
+ outBuffer.append("mean: ").append(getMean()).append(endl);
+ outBuffer.append("geometric mean: ").append(getGeometricMean())
+ .append(endl);
+ outBuffer.append("variance: ").append(getVariance()).append(endl);
+ outBuffer.append("sum of squares: ").append(getSumsq()).append(endl);
+ outBuffer.append("standard deviation: ").append(getStandardDeviation())
+ .append(endl);
+ outBuffer.append("sum of logs: ").append(getSumOfLogs()).append(endl);
+ return outBuffer.toString();
+ }
+
+ /**
+ * Resets all statistics and storage
+ */
+ public void clear() {
+ this.n = 0;
+ minImpl.clear();
+ maxImpl.clear();
+ sumImpl.clear();
+ sumLogImpl.clear();
+ sumsqImpl.clear();
+ geoMeanImpl.clear();
+ secondMoment.clear();
+ if (meanImpl != mean) {
+ meanImpl.clear();
+ }
+ if (varianceImpl != variance) {
+ varianceImpl.clear();
+ }
+ }
+
+ /**
+ * Returns true iff <code>object</code> is a
+ * <code>SummaryStatistics</code> instance and all statistics have the
+ * same values as this.
+ * @param object the object to test equality against.
+ * @return true if object equals this
+ */
+ @Override
+ public boolean equals(Object object) {
+ if (object == this) {
+ return true;
+ }
+ if (object instanceof SummaryStatistics == false) {
+ return false;
+ }
+ SummaryStatistics stat = (SummaryStatistics)object;
+ return MathUtils.equalsIncludingNaN(stat.getGeometricMean(), getGeometricMean()) &&
+ MathUtils.equalsIncludingNaN(stat.getMax(), getMax()) &&
+ MathUtils.equalsIncludingNaN(stat.getMean(), getMean()) &&
+ MathUtils.equalsIncludingNaN(stat.getMin(), getMin()) &&
+ MathUtils.equalsIncludingNaN(stat.getN(), getN()) &&
+ MathUtils.equalsIncludingNaN(stat.getSum(), getSum()) &&
+ MathUtils.equalsIncludingNaN(stat.getSumsq(), getSumsq()) &&
+ MathUtils.equalsIncludingNaN(stat.getVariance(), getVariance());
+ }
+
+ /**
+ * Returns hash code based on values of statistics
+ * @return hash code
+ */
+ @Override
+ public int hashCode() {
+ int result = 31 + MathUtils.hash(getGeometricMean());
+ result = result * 31 + MathUtils.hash(getGeometricMean());
+ result = result * 31 + MathUtils.hash(getMax());
+ result = result * 31 + MathUtils.hash(getMean());
+ result = result * 31 + MathUtils.hash(getMin());
+ result = result * 31 + MathUtils.hash(getN());
+ result = result * 31 + MathUtils.hash(getSum());
+ result = result * 31 + MathUtils.hash(getSumsq());
+ result = result * 31 + MathUtils.hash(getVariance());
+ return result;
+ }
+
+ // Getters and setters for statistics implementations
+ /**
+ * Returns the currently configured Sum implementation
+ * @return the StorelessUnivariateStatistic implementing the sum
+ * @since 1.2
+ */
+ public StorelessUnivariateStatistic getSumImpl() {
+ return sumImpl;
+ }
+
+ /**
+ * <p>
+ * Sets the implementation for the Sum.
+ * </p>
+ * <p>
+ * This method must be activated before any data has been added - i.e.,
+ * before {@link #addValue(double) addValue} has been used to add data;
+ * otherwise an IllegalStateException will be thrown.
+ * </p>
+ * @param sumImpl the StorelessUnivariateStatistic instance to use for
+ * computing the Sum
+ * @throws IllegalStateException if data has already been added (i.e if n >
+ * 0)
+ * @since 1.2
+ */
+ public void setSumImpl(StorelessUnivariateStatistic sumImpl) {
+ checkEmpty();
+ this.sumImpl = sumImpl;
+ }
+
+ /**
+ * Returns the currently configured sum of squares implementation
+ * @return the StorelessUnivariateStatistic implementing the sum of squares
+ * @since 1.2
+ */
+ public StorelessUnivariateStatistic getSumsqImpl() {
+ return sumsqImpl;
+ }
+
+ /**
+ * <p>
+ * Sets the implementation for the sum of squares.
+ * </p>
+ * <p>
+ * This method must be activated before any data has been added - i.e.,
+ * before {@link #addValue(double) addValue} has been used to add data;
+ * otherwise an IllegalStateException will be thrown.
+ * </p>
+ * @param sumsqImpl the StorelessUnivariateStatistic instance to use for
+ * computing the sum of squares
+ * @throws IllegalStateException if data has already been added (i.e if n >
+ * 0)
+ * @since 1.2
+ */
+ public void setSumsqImpl(StorelessUnivariateStatistic sumsqImpl) {
+ checkEmpty();
+ this.sumsqImpl = sumsqImpl;
+ }
+
+ /**
+ * Returns the currently configured minimum implementation
+ * @return the StorelessUnivariateStatistic implementing the minimum
+ * @since 1.2
+ */
+ public StorelessUnivariateStatistic getMinImpl() {
+ return minImpl;
+ }
+
+ /**
+ * <p>
+ * Sets the implementation for the minimum.
+ * </p>
+ * <p>
+ * This method must be activated before any data has been added - i.e.,
+ * before {@link #addValue(double) addValue} has been used to add data;
+ * otherwise an IllegalStateException will be thrown.
+ * </p>
+ * @param minImpl the StorelessUnivariateStatistic instance to use for
+ * computing the minimum
+ * @throws IllegalStateException if data has already been added (i.e if n >
+ * 0)
+ * @since 1.2
+ */
+ public void setMinImpl(StorelessUnivariateStatistic minImpl) {
+ checkEmpty();
+ this.minImpl = minImpl;
+ }
+
+ /**
+ * Returns the currently configured maximum implementation
+ * @return the StorelessUnivariateStatistic implementing the maximum
+ * @since 1.2
+ */
+ public StorelessUnivariateStatistic getMaxImpl() {
+ return maxImpl;
+ }
+
+ /**
+ * <p>
+ * Sets the implementation for the maximum.
+ * </p>
+ * <p>
+ * This method must be activated before any data has been added - i.e.,
+ * before {@link #addValue(double) addValue} has been used to add data;
+ * otherwise an IllegalStateException will be thrown.
+ * </p>
+ * @param maxImpl the StorelessUnivariateStatistic instance to use for
+ * computing the maximum
+ * @throws IllegalStateException if data has already been added (i.e if n >
+ * 0)
+ * @since 1.2
+ */
+ public void setMaxImpl(StorelessUnivariateStatistic maxImpl) {
+ checkEmpty();
+ this.maxImpl = maxImpl;
+ }
+
+ /**
+ * Returns the currently configured sum of logs implementation
+ * @return the StorelessUnivariateStatistic implementing the log sum
+ * @since 1.2
+ */
+ public StorelessUnivariateStatistic getSumLogImpl() {
+ return sumLogImpl;
+ }
+
+ /**
+ * <p>
+ * Sets the implementation for the sum of logs.
+ * </p>
+ * <p>
+ * This method must be activated before any data has been added - i.e.,
+ * before {@link #addValue(double) addValue} has been used to add data;
+ * otherwise an IllegalStateException will be thrown.
+ * </p>
+ * @param sumLogImpl the StorelessUnivariateStatistic instance to use for
+ * computing the log sum
+ * @throws IllegalStateException if data has already been added (i.e if n >
+ * 0)
+ * @since 1.2
+ */
+ public void setSumLogImpl(StorelessUnivariateStatistic sumLogImpl) {
+ checkEmpty();
+ this.sumLogImpl = sumLogImpl;
+ geoMean.setSumLogImpl(sumLogImpl);
+ }
+
+ /**
+ * Returns the currently configured geometric mean implementation
+ * @return the StorelessUnivariateStatistic implementing the geometric mean
+ * @since 1.2
+ */
+ public StorelessUnivariateStatistic getGeoMeanImpl() {
+ return geoMeanImpl;
+ }
+
+ /**
+ * <p>
+ * Sets the implementation for the geometric mean.
+ * </p>
+ * <p>
+ * This method must be activated before any data has been added - i.e.,
+ * before {@link #addValue(double) addValue} has been used to add data;
+ * otherwise an IllegalStateException will be thrown.
+ * </p>
+ * @param geoMeanImpl the StorelessUnivariateStatistic instance to use for
+ * computing the geometric mean
+ * @throws IllegalStateException if data has already been added (i.e if n >
+ * 0)
+ * @since 1.2
+ */
+ public void setGeoMeanImpl(StorelessUnivariateStatistic geoMeanImpl) {
+ checkEmpty();
+ this.geoMeanImpl = geoMeanImpl;
+ }
+
+ /**
+ * Returns the currently configured mean implementation
+ * @return the StorelessUnivariateStatistic implementing the mean
+ * @since 1.2
+ */
+ public StorelessUnivariateStatistic getMeanImpl() {
+ return meanImpl;
+ }
+
+ /**
+ * <p>
+ * Sets the implementation for the mean.
+ * </p>
+ * <p>
+ * This method must be activated before any data has been added - i.e.,
+ * before {@link #addValue(double) addValue} has been used to add data;
+ * otherwise an IllegalStateException will be thrown.
+ * </p>
+ * @param meanImpl the StorelessUnivariateStatistic instance to use for
+ * computing the mean
+ * @throws IllegalStateException if data has already been added (i.e if n >
+ * 0)
+ * @since 1.2
+ */
+ public void setMeanImpl(StorelessUnivariateStatistic meanImpl) {
+ checkEmpty();
+ this.meanImpl = meanImpl;
+ }
+
+ /**
+ * Returns the currently configured variance implementation
+ * @return the StorelessUnivariateStatistic implementing the variance
+ * @since 1.2
+ */
+ public StorelessUnivariateStatistic getVarianceImpl() {
+ return varianceImpl;
+ }
+
+ /**
+ * <p>
+ * Sets the implementation for the variance.
+ * </p>
+ * <p>
+ * This method must be activated before any data has been added - i.e.,
+ * before {@link #addValue(double) addValue} has been used to add data;
+ * otherwise an IllegalStateException will be thrown.
+ * </p>
+ * @param varianceImpl the StorelessUnivariateStatistic instance to use for
+ * computing the variance
+ * @throws IllegalStateException if data has already been added (i.e if n >
+ * 0)
+ * @since 1.2
+ */
+ public void setVarianceImpl(StorelessUnivariateStatistic varianceImpl) {
+ checkEmpty();
+ this.varianceImpl = varianceImpl;
+ }
+
+ /**
+ * Throws IllegalStateException if n > 0.
+ */
+ private void checkEmpty() {
+ if (n > 0) {
+ throw MathRuntimeException.createIllegalStateException(
+ LocalizedFormats.VALUES_ADDED_BEFORE_CONFIGURING_STATISTIC,
+ n);
+ }
+ }
+
+ /**
+ * Returns a copy of this SummaryStatistics instance with the same internal state.
+ *
+ * @return a copy of this
+ */
+ public SummaryStatistics copy() {
+ SummaryStatistics result = new SummaryStatistics();
+ copy(this, result);
+ return result;
+ }
+
+ /**
+ * Copies source to dest.
+ * <p>Neither source nor dest can be null.</p>
+ *
+ * @param source SummaryStatistics to copy
+ * @param dest SummaryStatistics to copy to
+ * @throws NullPointerException if either source or dest is null
+ */
+ public static void copy(SummaryStatistics source, SummaryStatistics dest) {
+ dest.maxImpl = source.maxImpl.copy();
+ dest.meanImpl = source.meanImpl.copy();
+ dest.minImpl = source.minImpl.copy();
+ dest.sumImpl = source.sumImpl.copy();
+ dest.varianceImpl = source.varianceImpl.copy();
+ dest.sumLogImpl = source.sumLogImpl.copy();
+ dest.sumsqImpl = source.sumsqImpl.copy();
+ if (source.getGeoMeanImpl() instanceof GeometricMean) {
+ // Keep geoMeanImpl, sumLogImpl in synch
+ dest.geoMeanImpl = new GeometricMean((SumOfLogs) dest.sumLogImpl);
+ } else {
+ dest.geoMeanImpl = source.geoMeanImpl.copy();
+ }
+ SecondMoment.copy(source.secondMoment, dest.secondMoment);
+ dest.n = source.n;
+
+ // Make sure that if stat == statImpl in source, same
+ // holds in dest; otherwise copy stat
+ if (source.geoMean == source.geoMeanImpl) {
+ dest.geoMean = (GeometricMean) dest.geoMeanImpl;
+ } else {
+ GeometricMean.copy(source.geoMean, dest.geoMean);
+ }
+ if (source.max == source.maxImpl) {
+ dest.max = (Max) dest.maxImpl;
+ } else {
+ Max.copy(source.max, dest.max);
+ }
+ if (source.mean == source.meanImpl) {
+ dest.mean = (Mean) dest.meanImpl;
+ } else {
+ Mean.copy(source.mean, dest.mean);
+ }
+ if (source.min == source.minImpl) {
+ dest.min = (Min) dest.minImpl;
+ } else {
+ Min.copy(source.min, dest.min);
+ }
+ if (source.sum == source.sumImpl) {
+ dest.sum = (Sum) dest.sumImpl;
+ } else {
+ Sum.copy(source.sum, dest.sum);
+ }
+ if (source.variance == source.varianceImpl) {
+ dest.variance = (Variance) dest.varianceImpl;
+ } else {
+ Variance.copy(source.variance, dest.variance);
+ }
+ if (source.sumLog == source.sumLogImpl) {
+ dest.sumLog = (SumOfLogs) dest.sumLogImpl;
+ } else {
+ SumOfLogs.copy(source.sumLog, dest.sumLog);
+ }
+ if (source.sumsq == source.sumsqImpl) {
+ dest.sumsq = (SumOfSquares) dest.sumsqImpl;
+ } else {
+ SumOfSquares.copy(source.sumsq, dest.sumsq);
+ }
+ }
+}
diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/SynchronizedDescriptiveStatistics.java b/src/main/java/org/apache/commons/math/stat/descriptive/SynchronizedDescriptiveStatistics.java
new file mode 100644
index 0000000..f1a932d
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/descriptive/SynchronizedDescriptiveStatistics.java
@@ -0,0 +1,172 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.descriptive;
+
+/**
+ * Implementation of
+ * {@link org.apache.commons.math.stat.descriptive.DescriptiveStatistics} that
+ * is safe to use in a multithreaded environment. Multiple threads can safely
+ * operate on a single instance without causing runtime exceptions due to race
+ * conditions. In effect, this implementation makes modification and access
+ * methods atomic operations for a single instance. That is to say, as one
+ * thread is computing a statistic from the instance, no other thread can modify
+ * the instance nor compute another statistic.
+ *
+ * @since 1.2
+ * @version $Revision: 811685 $ $Date: 2009-09-05 19:36:48 +0200 (sam. 05 sept. 2009) $
+ */
+public class SynchronizedDescriptiveStatistics extends DescriptiveStatistics {
+
+ /** Serialization UID */
+ private static final long serialVersionUID = 1L;
+
+ /**
+ * Construct an instance with infinite window
+ */
+ public SynchronizedDescriptiveStatistics() {
+ this(INFINITE_WINDOW);
+ }
+
+ /**
+ * Construct an instance with finite window
+ * @param window the finite window size.
+ */
+ public SynchronizedDescriptiveStatistics(int window) {
+ super(window);
+ }
+
+ /**
+ * A copy constructor. Creates a deep-copy of the {@code original}.
+ *
+ * @param original the {@code SynchronizedDescriptiveStatistics} instance to copy
+ */
+ public SynchronizedDescriptiveStatistics(SynchronizedDescriptiveStatistics original) {
+ copy(original, this);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized void addValue(double v) {
+ super.addValue(v);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized double apply(UnivariateStatistic stat) {
+ return super.apply(stat);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized void clear() {
+ super.clear();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized double getElement(int index) {
+ return super.getElement(index);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized long getN() {
+ return super.getN();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized double getStandardDeviation() {
+ return super.getStandardDeviation();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized double[] getValues() {
+ return super.getValues();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized int getWindowSize() {
+ return super.getWindowSize();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized void setWindowSize(int windowSize) {
+ super.setWindowSize(windowSize);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized String toString() {
+ return super.toString();
+ }
+
+ /**
+ * Returns a copy of this SynchronizedDescriptiveStatistics instance with the
+ * same internal state.
+ *
+ * @return a copy of this
+ */
+ @Override
+ public synchronized SynchronizedDescriptiveStatistics copy() {
+ SynchronizedDescriptiveStatistics result =
+ new SynchronizedDescriptiveStatistics();
+ copy(this, result);
+ return result;
+ }
+
+ /**
+ * Copies source to dest.
+ * <p>Neither source nor dest can be null.</p>
+ * <p>Acquires synchronization lock on source, then dest before copying.</p>
+ *
+ * @param source SynchronizedDescriptiveStatistics to copy
+ * @param dest SynchronizedDescriptiveStatistics to copy to
+ * @throws NullPointerException if either source or dest is null
+ */
+ public static void copy(SynchronizedDescriptiveStatistics source,
+ SynchronizedDescriptiveStatistics dest) {
+ synchronized (source) {
+ synchronized (dest) {
+ DescriptiveStatistics.copy(source, dest);
+ }
+ }
+ }
+}
diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/SynchronizedMultivariateSummaryStatistics.java b/src/main/java/org/apache/commons/math/stat/descriptive/SynchronizedMultivariateSummaryStatistics.java
new file mode 100644
index 0000000..190e092
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/descriptive/SynchronizedMultivariateSummaryStatistics.java
@@ -0,0 +1,299 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.descriptive;
+
+import org.apache.commons.math.DimensionMismatchException;
+import org.apache.commons.math.linear.RealMatrix;
+
+/**
+ * Implementation of
+ * {@link org.apache.commons.math.stat.descriptive.MultivariateSummaryStatistics} that
+ * is safe to use in a multithreaded environment. Multiple threads can safely
+ * operate on a single instance without causing runtime exceptions due to race
+ * conditions. In effect, this implementation makes modification and access
+ * methods atomic operations for a single instance. That is to say, as one
+ * thread is computing a statistic from the instance, no other thread can modify
+ * the instance nor compute another statistic.
+ * @since 1.2
+ * @version $Revision: 811685 $ $Date: 2009-09-05 19:36:48 +0200 (sam. 05 sept. 2009) $
+ */
+public class SynchronizedMultivariateSummaryStatistics
+ extends MultivariateSummaryStatistics {
+
+ /** Serialization UID */
+ private static final long serialVersionUID = 7099834153347155363L;
+
+ /**
+ * Construct a SynchronizedMultivariateSummaryStatistics instance
+ * @param k dimension of the data
+ * @param isCovarianceBiasCorrected if true, the unbiased sample
+ * covariance is computed, otherwise the biased population covariance
+ * is computed
+ */
+ public SynchronizedMultivariateSummaryStatistics(int k, boolean isCovarianceBiasCorrected) {
+ super(k, isCovarianceBiasCorrected);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized void addValue(double[] value)
+ throws DimensionMismatchException {
+ super.addValue(value);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized int getDimension() {
+ return super.getDimension();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized long getN() {
+ return super.getN();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized double[] getSum() {
+ return super.getSum();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized double[] getSumSq() {
+ return super.getSumSq();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized double[] getSumLog() {
+ return super.getSumLog();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized double[] getMean() {
+ return super.getMean();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized double[] getStandardDeviation() {
+ return super.getStandardDeviation();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized RealMatrix getCovariance() {
+ return super.getCovariance();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized double[] getMax() {
+ return super.getMax();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized double[] getMin() {
+ return super.getMin();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized double[] getGeometricMean() {
+ return super.getGeometricMean();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized String toString() {
+ return super.toString();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized void clear() {
+ super.clear();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized boolean equals(Object object) {
+ return super.equals(object);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized int hashCode() {
+ return super.hashCode();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized StorelessUnivariateStatistic[] getSumImpl() {
+ return super.getSumImpl();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized void setSumImpl(StorelessUnivariateStatistic[] sumImpl)
+ throws DimensionMismatchException {
+ super.setSumImpl(sumImpl);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized StorelessUnivariateStatistic[] getSumsqImpl() {
+ return super.getSumsqImpl();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized void setSumsqImpl(StorelessUnivariateStatistic[] sumsqImpl)
+ throws DimensionMismatchException {
+ super.setSumsqImpl(sumsqImpl);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized StorelessUnivariateStatistic[] getMinImpl() {
+ return super.getMinImpl();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized void setMinImpl(StorelessUnivariateStatistic[] minImpl)
+ throws DimensionMismatchException {
+ super.setMinImpl(minImpl);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized StorelessUnivariateStatistic[] getMaxImpl() {
+ return super.getMaxImpl();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized void setMaxImpl(StorelessUnivariateStatistic[] maxImpl)
+ throws DimensionMismatchException {
+ super.setMaxImpl(maxImpl);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized StorelessUnivariateStatistic[] getSumLogImpl() {
+ return super.getSumLogImpl();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized void setSumLogImpl(StorelessUnivariateStatistic[] sumLogImpl)
+ throws DimensionMismatchException {
+ super.setSumLogImpl(sumLogImpl);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized StorelessUnivariateStatistic[] getGeoMeanImpl() {
+ return super.getGeoMeanImpl();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized void setGeoMeanImpl(StorelessUnivariateStatistic[] geoMeanImpl)
+ throws DimensionMismatchException {
+ super.setGeoMeanImpl(geoMeanImpl);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized StorelessUnivariateStatistic[] getMeanImpl() {
+ return super.getMeanImpl();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized void setMeanImpl(StorelessUnivariateStatistic[] meanImpl)
+ throws DimensionMismatchException {
+ super.setMeanImpl(meanImpl);
+ }
+
+}
diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/SynchronizedSummaryStatistics.java b/src/main/java/org/apache/commons/math/stat/descriptive/SynchronizedSummaryStatistics.java
new file mode 100644
index 0000000..07bfbf2
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/descriptive/SynchronizedSummaryStatistics.java
@@ -0,0 +1,333 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.descriptive;
+
+/**
+ * Implementation of
+ * {@link org.apache.commons.math.stat.descriptive.SummaryStatistics} that
+ * is safe to use in a multithreaded environment. Multiple threads can safely
+ * operate on a single instance without causing runtime exceptions due to race
+ * conditions. In effect, this implementation makes modification and access
+ * methods atomic operations for a single instance. That is to say, as one
+ * thread is computing a statistic from the instance, no other thread can modify
+ * the instance nor compute another statistic.
+ *
+ * @since 1.2
+ * @version $Revision: 811685 $ $Date: 2009-09-05 19:36:48 +0200 (sam. 05 sept. 2009) $
+ */
+public class SynchronizedSummaryStatistics extends SummaryStatistics {
+
+ /** Serialization UID */
+ private static final long serialVersionUID = 1909861009042253704L;
+
+ /**
+ * Construct a SynchronizedSummaryStatistics instance
+ */
+ public SynchronizedSummaryStatistics() {
+ super();
+ }
+
+ /**
+ * A copy constructor. Creates a deep-copy of the {@code original}.
+ *
+ * @param original the {@code SynchronizedSummaryStatistics} instance to copy
+ */
+ public SynchronizedSummaryStatistics(SynchronizedSummaryStatistics original) {
+ copy(original, this);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized StatisticalSummary getSummary() {
+ return super.getSummary();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized void addValue(double value) {
+ super.addValue(value);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized long getN() {
+ return super.getN();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized double getSum() {
+ return super.getSum();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized double getSumsq() {
+ return super.getSumsq();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized double getMean() {
+ return super.getMean();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized double getStandardDeviation() {
+ return super.getStandardDeviation();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized double getVariance() {
+ return super.getVariance();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized double getMax() {
+ return super.getMax();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized double getMin() {
+ return super.getMin();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized double getGeometricMean() {
+ return super.getGeometricMean();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized String toString() {
+ return super.toString();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized void clear() {
+ super.clear();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized boolean equals(Object object) {
+ return super.equals(object);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized int hashCode() {
+ return super.hashCode();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized StorelessUnivariateStatistic getSumImpl() {
+ return super.getSumImpl();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized void setSumImpl(StorelessUnivariateStatistic sumImpl) {
+ super.setSumImpl(sumImpl);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized StorelessUnivariateStatistic getSumsqImpl() {
+ return super.getSumsqImpl();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized void setSumsqImpl(StorelessUnivariateStatistic sumsqImpl) {
+ super.setSumsqImpl(sumsqImpl);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized StorelessUnivariateStatistic getMinImpl() {
+ return super.getMinImpl();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized void setMinImpl(StorelessUnivariateStatistic minImpl) {
+ super.setMinImpl(minImpl);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized StorelessUnivariateStatistic getMaxImpl() {
+ return super.getMaxImpl();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized void setMaxImpl(StorelessUnivariateStatistic maxImpl) {
+ super.setMaxImpl(maxImpl);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized StorelessUnivariateStatistic getSumLogImpl() {
+ return super.getSumLogImpl();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized void setSumLogImpl(StorelessUnivariateStatistic sumLogImpl) {
+ super.setSumLogImpl(sumLogImpl);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized StorelessUnivariateStatistic getGeoMeanImpl() {
+ return super.getGeoMeanImpl();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized void setGeoMeanImpl(StorelessUnivariateStatistic geoMeanImpl) {
+ super.setGeoMeanImpl(geoMeanImpl);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized StorelessUnivariateStatistic getMeanImpl() {
+ return super.getMeanImpl();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized void setMeanImpl(StorelessUnivariateStatistic meanImpl) {
+ super.setMeanImpl(meanImpl);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized StorelessUnivariateStatistic getVarianceImpl() {
+ return super.getVarianceImpl();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public synchronized void setVarianceImpl(StorelessUnivariateStatistic varianceImpl) {
+ super.setVarianceImpl(varianceImpl);
+ }
+
+ /**
+ * Returns a copy of this SynchronizedSummaryStatistics instance with the
+ * same internal state.
+ *
+ * @return a copy of this
+ */
+ @Override
+ public synchronized SynchronizedSummaryStatistics copy() {
+ SynchronizedSummaryStatistics result =
+ new SynchronizedSummaryStatistics();
+ copy(this, result);
+ return result;
+ }
+
+ /**
+ * Copies source to dest.
+ * <p>Neither source nor dest can be null.</p>
+ * <p>Acquires synchronization lock on source, then dest before copying.</p>
+ *
+ * @param source SynchronizedSummaryStatistics to copy
+ * @param dest SynchronizedSummaryStatistics to copy to
+ * @throws NullPointerException if either source or dest is null
+ */
+ public static void copy(SynchronizedSummaryStatistics source,
+ SynchronizedSummaryStatistics dest) {
+ synchronized (source) {
+ synchronized (dest) {
+ SummaryStatistics.copy(source, dest);
+ }
+ }
+ }
+
+}
diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/UnivariateStatistic.java b/src/main/java/org/apache/commons/math/stat/descriptive/UnivariateStatistic.java
new file mode 100644
index 0000000..92c9ee2
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/descriptive/UnivariateStatistic.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.descriptive;
+
+
+/**
+ * Base interface implemented by all statistics.
+ *
+ * @version $Revision: 811685 $ $Date: 2009-09-05 19:36:48 +0200 (sam. 05 sept. 2009) $
+ */
+public interface UnivariateStatistic {
+
+ /**
+ * Returns the result of evaluating the statistic over the input array.
+ *
+ * @param values input array
+ * @return the value of the statistic applied to the input array
+ */
+ double evaluate(double[] values);
+
+ /**
+ * Returns the result of evaluating the statistic over the specified entries
+ * in the input array.
+ *
+ * @param values the input array
+ * @param begin the index of the first element to include
+ * @param length the number of elements to include
+ * @return the value of the statistic applied to the included array entries
+ */
+ double evaluate(double[] values, int begin, int length);
+
+ /**
+ * Returns a copy of the statistic with the same internal state.
+ *
+ * @return a copy of the statistic
+ */
+ UnivariateStatistic copy();
+
+}
diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/WeightedEvaluation.java b/src/main/java/org/apache/commons/math/stat/descriptive/WeightedEvaluation.java
new file mode 100644
index 0000000..54a0216
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/descriptive/WeightedEvaluation.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.descriptive;
+
+/**
+ * Weighted evaluation for statistics.
+ *
+ * @since 2.1
+ * @version $Revision: 894474 $ $Date: 2009-12-29 21:02:37 +0100 (mar. 29 déc. 2009) $
+ */
+public interface WeightedEvaluation {
+
+ /**
+ * Returns the result of evaluating the statistic over the input array,
+ * using the supplied weights.
+ *
+ * @param values input array
+ * @param weights array of weights
+ * @return the value of the weighted statistic applied to the input array
+ */
+ double evaluate(double[] values, double[] weights);
+
+ /**
+ * Returns the result of evaluating the statistic over the specified entries
+ * in the input array, using corresponding entries in the supplied weights array.
+ *
+ * @param values the input array
+ * @param weights array of weights
+ * @param begin the index of the first element to include
+ * @param length the number of elements to include
+ * @return the value of the weighted statistic applied to the included array entries
+ */
+ double evaluate(double[] values, double[] weights, int begin, int length);
+
+}
diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/moment/FirstMoment.java b/src/main/java/org/apache/commons/math/stat/descriptive/moment/FirstMoment.java
new file mode 100644
index 0000000..4103e50
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/descriptive/moment/FirstMoment.java
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.descriptive.moment;
+
+import java.io.Serializable;
+import org.apache.commons.math.stat.descriptive.AbstractStorelessUnivariateStatistic;
+
+/**
+ * Computes the first moment (arithmetic mean). Uses the definitional formula:
+ * <p>
+ * mean = sum(x_i) / n </p>
+ * <p>
+ * where <code>n</code> is the number of observations. </p>
+ * <p>
+ * To limit numeric errors, the value of the statistic is computed using the
+ * following recursive updating algorithm: </p>
+ * <p>
+ * <ol>
+ * <li>Initialize <code>m = </code> the first value</li>
+ * <li>For each additional value, update using <br>
+ * <code>m = m + (new value - m) / (number of observations)</code></li>
+ * </ol></p>
+ * <p>
+ * Returns <code>Double.NaN</code> if the dataset is empty.</p>
+ * <p>
+ * <strong>Note that this implementation is not synchronized.</strong> If
+ * multiple threads access an instance of this class concurrently, and at least
+ * one of the threads invokes the <code>increment()</code> or
+ * <code>clear()</code> method, it must be synchronized externally.</p>
+ *
+ * @version $Revision: 1006299 $ $Date: 2010-10-10 16:47:17 +0200 (dim. 10 oct. 2010) $
+ */
+public class FirstMoment extends AbstractStorelessUnivariateStatistic
+ implements Serializable {
+
+ /** Serializable version identifier */
+ private static final long serialVersionUID = 6112755307178490473L;
+
+
+ /** Count of values that have been added */
+ protected long n;
+
+ /** First moment of values that have been added */
+ protected double m1;
+
+ /**
+ * Deviation of most recently added value from previous first moment.
+ * Retained to prevent repeated computation in higher order moments.
+ */
+ protected double dev;
+
+ /**
+ * Deviation of most recently added value from previous first moment,
+ * normalized by previous sample size. Retained to prevent repeated
+ * computation in higher order moments
+ */
+ protected double nDev;
+
+ /**
+ * Create a FirstMoment instance
+ */
+ public FirstMoment() {
+ n = 0;
+ m1 = Double.NaN;
+ dev = Double.NaN;
+ nDev = Double.NaN;
+ }
+
+ /**
+ * Copy constructor, creates a new {@code FirstMoment} identical
+ * to the {@code original}
+ *
+ * @param original the {@code FirstMoment} instance to copy
+ */
+ public FirstMoment(FirstMoment original) {
+ super();
+ copy(original, this);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void increment(final double d) {
+ if (n == 0) {
+ m1 = 0.0;
+ }
+ n++;
+ double n0 = n;
+ dev = d - m1;
+ nDev = dev / n0;
+ m1 += nDev;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void clear() {
+ m1 = Double.NaN;
+ n = 0;
+ dev = Double.NaN;
+ nDev = Double.NaN;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public double getResult() {
+ return m1;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public long getN() {
+ return n;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public FirstMoment copy() {
+ FirstMoment result = new FirstMoment();
+ copy(this, result);
+ return result;
+ }
+
+ /**
+ * Copies source to dest.
+ * <p>Neither source nor dest can be null.</p>
+ *
+ * @param source FirstMoment to copy
+ * @param dest FirstMoment to copy to
+ * @throws NullPointerException if either source or dest is null
+ */
+ public static void copy(FirstMoment source, FirstMoment dest) {
+ dest.setData(source.getDataRef());
+ dest.n = source.n;
+ dest.m1 = source.m1;
+ dest.dev = source.dev;
+ dest.nDev = source.nDev;
+ }
+}
diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/moment/FourthMoment.java b/src/main/java/org/apache/commons/math/stat/descriptive/moment/FourthMoment.java
new file mode 100644
index 0000000..6e7d8d2
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/descriptive/moment/FourthMoment.java
@@ -0,0 +1,142 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.descriptive.moment;
+
+import java.io.Serializable;
+
+/**
+ * Computes a statistic related to the Fourth Central Moment. Specifically,
+ * what is computed is the sum of
+ * <p>
+ * (x_i - xbar) ^ 4, </p>
+ * <p>
+ * where the x_i are the
+ * sample observations and xbar is the sample mean. </p>
+ * <p>
+ * The following recursive updating formula is used: </p>
+ * <p>
+ * Let <ul>
+ * <li> dev = (current obs - previous mean) </li>
+ * <li> m2 = previous value of {@link SecondMoment} </li>
+ * <li> m2 = previous value of {@link ThirdMoment} </li>
+ * <li> n = number of observations (including current obs) </li>
+ * </ul>
+ * Then </p>
+ * <p>
+ * new value = old value - 4 * (dev/n) * m3 + 6 * (dev/n)^2 * m2 + <br>
+ * [n^2 - 3 * (n-1)] * dev^4 * (n-1) / n^3 </p>
+ * <p>
+ * Returns <code>Double.NaN</code> if no data values have been added and
+ * returns <code>0</code> if there is just one value in the data set. </p>
+ * <p>
+ * <strong>Note that this implementation is not synchronized.</strong> If
+ * multiple threads access an instance of this class concurrently, and at least
+ * one of the threads invokes the <code>increment()</code> or
+ * <code>clear()</code> method, it must be synchronized externally. </p>
+ *
+ * @version $Revision: 811685 $ $Date: 2009-09-05 19:36:48 +0200 (sam. 05 sept. 2009) $
+ */
+public class FourthMoment extends ThirdMoment implements Serializable{
+
+ /** Serializable version identifier */
+ private static final long serialVersionUID = 4763990447117157611L;
+
+ /** fourth moment of values that have been added */
+ protected double m4;
+
+ /**
+ * Create a FourthMoment instance
+ */
+ public FourthMoment() {
+ super();
+ m4 = Double.NaN;
+ }
+
+ /**
+ * Copy constructor, creates a new {@code FourthMoment} identical
+ * to the {@code original}
+ *
+ * @param original the {@code FourthMoment} instance to copy
+ */
+ public FourthMoment(FourthMoment original) {
+ super();
+ copy(original, this);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void increment(final double d) {
+ if (n < 1) {
+ m4 = 0.0;
+ m3 = 0.0;
+ m2 = 0.0;
+ m1 = 0.0;
+ }
+
+ double prevM3 = m3;
+ double prevM2 = m2;
+
+ super.increment(d);
+
+ double n0 = n;
+
+ m4 = m4 - 4.0 * nDev * prevM3 + 6.0 * nDevSq * prevM2 +
+ ((n0 * n0) - 3 * (n0 -1)) * (nDevSq * nDevSq * (n0 - 1) * n0);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public double getResult() {
+ return m4;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void clear() {
+ super.clear();
+ m4 = Double.NaN;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public FourthMoment copy() {
+ FourthMoment result = new FourthMoment();
+ copy(this, result);
+ return result;
+ }
+
+ /**
+ * Copies source to dest.
+ * <p>Neither source nor dest can be null.</p>
+ *
+ * @param source FourthMoment to copy
+ * @param dest FourthMoment to copy to
+ * @throws NullPointerException if either source or dest is null
+ */
+ public static void copy(FourthMoment source, FourthMoment dest) {
+ ThirdMoment.copy(source, dest);
+ dest.m4 = source.m4;
+ }
+}
diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/moment/GeometricMean.java b/src/main/java/org/apache/commons/math/stat/descriptive/moment/GeometricMean.java
new file mode 100644
index 0000000..a24a3c8
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/descriptive/moment/GeometricMean.java
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.descriptive.moment;
+
+import java.io.Serializable;
+
+import org.apache.commons.math.MathRuntimeException;
+import org.apache.commons.math.exception.util.LocalizedFormats;
+import org.apache.commons.math.stat.descriptive.AbstractStorelessUnivariateStatistic;
+import org.apache.commons.math.stat.descriptive.StorelessUnivariateStatistic;
+import org.apache.commons.math.stat.descriptive.summary.SumOfLogs;
+import org.apache.commons.math.util.FastMath;
+
+/**
+ * Returns the <a href="http://www.xycoon.com/geometric_mean.htm">
+ * geometric mean </a> of the available values.
+ * <p>
+ * Uses a {@link SumOfLogs} instance to compute sum of logs and returns
+ * <code> exp( 1/n (sum of logs) ).</code> Therefore, </p>
+ * <ul>
+ * <li>If any of values are < 0, the result is <code>NaN.</code></li>
+ * <li>If all values are non-negative and less than
+ * <code>Double.POSITIVE_INFINITY</code>, but at least one value is 0, the
+ * result is <code>0.</code></li>
+ * <li>If both <code>Double.POSITIVE_INFINITY</code> and
+ * <code>Double.NEGATIVE_INFINITY</code> are among the values, the result is
+ * <code>NaN.</code></li>
+ * </ul> </p>
+ * <p>
+ * <strong>Note that this implementation is not synchronized.</strong> If
+ * multiple threads access an instance of this class concurrently, and at least
+ * one of the threads invokes the <code>increment()</code> or
+ * <code>clear()</code> method, it must be synchronized externally.</p>
+ *
+ *
+ * @version $Revision: 1006299 $ $Date: 2010-10-10 16:47:17 +0200 (dim. 10 oct. 2010) $
+ */
+public class GeometricMean extends AbstractStorelessUnivariateStatistic implements Serializable {
+
+ /** Serializable version identifier */
+ private static final long serialVersionUID = -8178734905303459453L;
+
+ /** Wrapped SumOfLogs instance */
+ private StorelessUnivariateStatistic sumOfLogs;
+
+ /**
+ * Create a GeometricMean instance
+ */
+ public GeometricMean() {
+ sumOfLogs = new SumOfLogs();
+ }
+
+ /**
+ * Copy constructor, creates a new {@code GeometricMean} identical
+ * to the {@code original}
+ *
+ * @param original the {@code GeometricMean} instance to copy
+ */
+ public GeometricMean(GeometricMean original) {
+ super();
+ copy(original, this);
+ }
+
+ /**
+ * Create a GeometricMean instance using the given SumOfLogs instance
+ * @param sumOfLogs sum of logs instance to use for computation
+ */
+ public GeometricMean(SumOfLogs sumOfLogs) {
+ this.sumOfLogs = sumOfLogs;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public GeometricMean copy() {
+ GeometricMean result = new GeometricMean();
+ copy(this, result);
+ return result;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void increment(final double d) {
+ sumOfLogs.increment(d);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public double getResult() {
+ if (sumOfLogs.getN() > 0) {
+ return FastMath.exp(sumOfLogs.getResult() / sumOfLogs.getN());
+ } else {
+ return Double.NaN;
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void clear() {
+ sumOfLogs.clear();
+ }
+
+ /**
+ * Returns the geometric mean of the entries in the specified portion
+ * of the input array.
+ * <p>
+ * See {@link GeometricMean} for details on the computing algorithm.</p>
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if the array is null.</p>
+ *
+ * @param values input array containing the values
+ * @param begin first array element to include
+ * @param length the number of elements to include
+ * @return the geometric mean or Double.NaN if length = 0 or
+ * any of the values are &lt;= 0.
+ * @throws IllegalArgumentException if the input array is null or the array
+ * index parameters are not valid
+ */
+ @Override
+ public double evaluate(
+ final double[] values, final int begin, final int length) {
+ return FastMath.exp(
+ sumOfLogs.evaluate(values, begin, length) / length);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public long getN() {
+ return sumOfLogs.getN();
+ }
+
+ /**
+ * <p>Sets the implementation for the sum of logs.</p>
+ * <p>This method must be activated before any data has been added - i.e.,
+ * before {@link #increment(double) increment} has been used to add data;
+ * otherwise an IllegalStateException will be thrown.</p>
+ *
+ * @param sumLogImpl the StorelessUnivariateStatistic instance to use
+ * for computing the log sum
+ * @throws IllegalStateException if data has already been added
+ * (i.e if n > 0)
+ */
+ public void setSumLogImpl(
+ StorelessUnivariateStatistic sumLogImpl) {
+ checkEmpty();
+ this.sumOfLogs = sumLogImpl;
+ }
+
+ /**
+ * Returns the currently configured sum of logs implementation
+ *
+ * @return the StorelessUnivariateStatistic implementing the log sum
+ */
+ public StorelessUnivariateStatistic getSumLogImpl() {
+ return sumOfLogs;
+ }
+
+ /**
+ * Copies source to dest.
+ * <p>Neither source nor dest can be null.</p>
+ *
+ * @param source GeometricMean to copy
+ * @param dest GeometricMean to copy to
+ * @throws NullPointerException if either source or dest is null
+ */
+ public static void copy(GeometricMean source, GeometricMean dest) {
+ dest.setData(source.getDataRef());
+ dest.sumOfLogs = source.sumOfLogs.copy();
+ }
+
+
+ /**
+ * Throws IllegalStateException if n > 0.
+ */
+ private void checkEmpty() {
+ if (getN() > 0) {
+ throw MathRuntimeException.createIllegalStateException(
+ LocalizedFormats.VALUES_ADDED_BEFORE_CONFIGURING_STATISTIC,
+ getN());
+ }
+ }
+
+}
diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/moment/Kurtosis.java b/src/main/java/org/apache/commons/math/stat/descriptive/moment/Kurtosis.java
new file mode 100644
index 0000000..f648051
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/descriptive/moment/Kurtosis.java
@@ -0,0 +1,222 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.descriptive.moment;
+
+import java.io.Serializable;
+
+import org.apache.commons.math.MathRuntimeException;
+import org.apache.commons.math.exception.util.LocalizedFormats;
+import org.apache.commons.math.stat.descriptive.AbstractStorelessUnivariateStatistic;
+import org.apache.commons.math.util.FastMath;
+
+
+/**
+ * Computes the Kurtosis of the available values.
+ * <p>
+ * We use the following (unbiased) formula to define kurtosis:</p>
+ * <p>
+ * kurtosis = { [n(n+1) / (n -1)(n - 2)(n-3)] sum[(x_i - mean)^4] / std^4 } - [3(n-1)^2 / (n-2)(n-3)]
+ * </p><p>
+ * where n is the number of values, mean is the {@link Mean} and std is the
+ * {@link StandardDeviation}</p>
+ * <p>
+ * Note that this statistic is undefined for n < 4. <code>Double.Nan</code>
+ * is returned when there is not sufficient data to compute the statistic.</p>
+ * <p>
+ * <strong>Note that this implementation is not synchronized.</strong> If
+ * multiple threads access an instance of this class concurrently, and at least
+ * one of the threads invokes the <code>increment()</code> or
+ * <code>clear()</code> method, it must be synchronized externally.</p>
+ *
+ * @version $Revision: 1006299 $ $Date: 2010-10-10 16:47:17 +0200 (dim. 10 oct. 2010) $
+ */
+public class Kurtosis extends AbstractStorelessUnivariateStatistic implements Serializable {
+
+ /** Serializable version identifier */
+ private static final long serialVersionUID = 2784465764798260919L;
+
+ /**Fourth Moment on which this statistic is based */
+ protected FourthMoment moment;
+
+ /**
+ * Determines whether or not this statistic can be incremented or cleared.
+ * <p>
+ * Statistics based on (constructed from) external moments cannot
+ * be incremented or cleared.</p>
+ */
+ protected boolean incMoment;
+
+ /**
+ * Construct a Kurtosis
+ */
+ public Kurtosis() {
+ incMoment = true;
+ moment = new FourthMoment();
+ }
+
+ /**
+ * Construct a Kurtosis from an external moment
+ *
+ * @param m4 external Moment
+ */
+ public Kurtosis(final FourthMoment m4) {
+ incMoment = false;
+ this.moment = m4;
+ }
+
+ /**
+ * Copy constructor, creates a new {@code Kurtosis} identical
+ * to the {@code original}
+ *
+ * @param original the {@code Kurtosis} instance to copy
+ */
+ public Kurtosis(Kurtosis original) {
+ copy(original, this);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void increment(final double d) {
+ if (incMoment) {
+ moment.increment(d);
+ } else {
+ throw MathRuntimeException.createIllegalStateException(
+ LocalizedFormats.CANNOT_INCREMENT_STATISTIC_CONSTRUCTED_FROM_EXTERNAL_MOMENTS);
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public double getResult() {
+ double kurtosis = Double.NaN;
+ if (moment.getN() > 3) {
+ double variance = moment.m2 / (moment.n - 1);
+ if (moment.n <= 3 || variance < 10E-20) {
+ kurtosis = 0.0;
+ } else {
+ double n = moment.n;
+ kurtosis =
+ (n * (n + 1) * moment.m4 -
+ 3 * moment.m2 * moment.m2 * (n - 1)) /
+ ((n - 1) * (n -2) * (n -3) * variance * variance);
+ }
+ }
+ return kurtosis;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void clear() {
+ if (incMoment) {
+ moment.clear();
+ } else {
+ throw MathRuntimeException.createIllegalStateException(
+ LocalizedFormats.CANNOT_CLEAR_STATISTIC_CONSTRUCTED_FROM_EXTERNAL_MOMENTS);
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public long getN() {
+ return moment.getN();
+ }
+
+ /* UnvariateStatistic Approach */
+
+ /**
+ * Returns the kurtosis of the entries in the specified portion of the
+ * input array.
+ * <p>
+ * See {@link Kurtosis} for details on the computing algorithm.</p>
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if the array is null.</p>
+ *
+ * @param values the input array
+ * @param begin index of the first array element to include
+ * @param length the number of elements to include
+ * @return the kurtosis of the values or Double.NaN if length is less than
+ * 4
+ * @throws IllegalArgumentException if the input array is null or the array
+ * index parameters are not valid
+ */
+ @Override
+ public double evaluate(final double[] values,final int begin, final int length) {
+ // Initialize the kurtosis
+ double kurt = Double.NaN;
+
+ if (test(values, begin, length) && length > 3) {
+
+ // Compute the mean and standard deviation
+ Variance variance = new Variance();
+ variance.incrementAll(values, begin, length);
+ double mean = variance.moment.m1;
+ double stdDev = FastMath.sqrt(variance.getResult());
+
+ // Sum the ^4 of the distance from the mean divided by the
+ // standard deviation
+ double accum3 = 0.0;
+ for (int i = begin; i < begin + length; i++) {
+ accum3 += FastMath.pow(values[i] - mean, 4.0);
+ }
+ accum3 /= FastMath.pow(stdDev, 4.0d);
+
+ // Get N
+ double n0 = length;
+
+ double coefficientOne =
+ (n0 * (n0 + 1)) / ((n0 - 1) * (n0 - 2) * (n0 - 3));
+ double termTwo =
+ (3 * FastMath.pow(n0 - 1, 2.0)) / ((n0 - 2) * (n0 - 3));
+
+ // Calculate kurtosis
+ kurt = (coefficientOne * accum3) - termTwo;
+ }
+ return kurt;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public Kurtosis copy() {
+ Kurtosis result = new Kurtosis();
+ copy(this, result);
+ return result;
+ }
+
+ /**
+ * Copies source to dest.
+ * <p>Neither source nor dest can be null.</p>
+ *
+ * @param source Kurtosis to copy
+ * @param dest Kurtosis to copy to
+ * @throws NullPointerException if either source or dest is null
+ */
+ public static void copy(Kurtosis source, Kurtosis dest) {
+ dest.setData(source.getDataRef());
+ dest.moment = source.moment.copy();
+ dest.incMoment = source.incMoment;
+ }
+
+}
diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/moment/Mean.java b/src/main/java/org/apache/commons/math/stat/descriptive/moment/Mean.java
new file mode 100644
index 0000000..c5aa9da
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/descriptive/moment/Mean.java
@@ -0,0 +1,272 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.descriptive.moment;
+
+import java.io.Serializable;
+
+import org.apache.commons.math.stat.descriptive.AbstractStorelessUnivariateStatistic;
+import org.apache.commons.math.stat.descriptive.WeightedEvaluation;
+import org.apache.commons.math.stat.descriptive.summary.Sum;
+
+/**
+ * <p>Computes the arithmetic mean of a set of values. Uses the definitional
+ * formula:</p>
+ * <p>
+ * mean = sum(x_i) / n
+ * </p>
+ * <p>where <code>n</code> is the number of observations.
+ * </p>
+ * <p>When {@link #increment(double)} is used to add data incrementally from a
+ * stream of (unstored) values, the value of the statistic that
+ * {@link #getResult()} returns is computed using the following recursive
+ * updating algorithm: </p>
+ * <ol>
+ * <li>Initialize <code>m = </code> the first value</li>
+ * <li>For each additional value, update using <br>
+ * <code>m = m + (new value - m) / (number of observations)</code></li>
+ * </ol>
+ * <p> If {@link #evaluate(double[])} is used to compute the mean of an array
+ * of stored values, a two-pass, corrected algorithm is used, starting with
+ * the definitional formula computed using the array of stored values and then
+ * correcting this by adding the mean deviation of the data values from the
+ * arithmetic mean. See, e.g. "Comparison of Several Algorithms for Computing
+ * Sample Means and Variances," Robert F. Ling, Journal of the American
+ * Statistical Association, Vol. 69, No. 348 (Dec., 1974), pp. 859-866. </p>
+ * <p>
+ * Returns <code>Double.NaN</code> if the dataset is empty.
+ * </p>
+ * <strong>Note that this implementation is not synchronized.</strong> If
+ * multiple threads access an instance of this class concurrently, and at least
+ * one of the threads invokes the <code>increment()</code> or
+ * <code>clear()</code> method, it must be synchronized externally.
+ *
+ * @version $Revision: 1006299 $ $Date: 2010-10-10 16:47:17 +0200 (dim. 10 oct. 2010) $
+ */
+public class Mean extends AbstractStorelessUnivariateStatistic
+ implements Serializable, WeightedEvaluation {
+
+ /** Serializable version identifier */
+ private static final long serialVersionUID = -1296043746617791564L;
+
+ /** First moment on which this statistic is based. */
+ protected FirstMoment moment;
+
+ /**
+ * Determines whether or not this statistic can be incremented or cleared.
+ * <p>
+ * Statistics based on (constructed from) external moments cannot
+ * be incremented or cleared.</p>
+ */
+ protected boolean incMoment;
+
+ /** Constructs a Mean. */
+ public Mean() {
+ incMoment = true;
+ moment = new FirstMoment();
+ }
+
+ /**
+ * Constructs a Mean with an External Moment.
+ *
+ * @param m1 the moment
+ */
+ public Mean(final FirstMoment m1) {
+ this.moment = m1;
+ incMoment = false;
+ }
+
+ /**
+ * Copy constructor, creates a new {@code Mean} identical
+ * to the {@code original}
+ *
+ * @param original the {@code Mean} instance to copy
+ */
+ public Mean(Mean original) {
+ copy(original, this);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void increment(final double d) {
+ if (incMoment) {
+ moment.increment(d);
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void clear() {
+ if (incMoment) {
+ moment.clear();
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public double getResult() {
+ return moment.m1;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public long getN() {
+ return moment.getN();
+ }
+
+ /**
+ * Returns the arithmetic mean of the entries in the specified portion of
+ * the input array, or <code>Double.NaN</code> if the designated subarray
+ * is empty.
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if the array is null.</p>
+ * <p>
+ * See {@link Mean} for details on the computing algorithm.</p>
+ *
+ * @param values the input array
+ * @param begin index of the first array element to include
+ * @param length the number of elements to include
+ * @return the mean of the values or Double.NaN if length = 0
+ * @throws IllegalArgumentException if the array is null or the array index
+ * parameters are not valid
+ */
+ @Override
+ public double evaluate(final double[] values,final int begin, final int length) {
+ if (test(values, begin, length)) {
+ Sum sum = new Sum();
+ double sampleSize = length;
+
+ // Compute initial estimate using definitional formula
+ double xbar = sum.evaluate(values, begin, length) / sampleSize;
+
+ // Compute correction factor in second pass
+ double correction = 0;
+ for (int i = begin; i < begin + length; i++) {
+ correction += values[i] - xbar;
+ }
+ return xbar + (correction/sampleSize);
+ }
+ return Double.NaN;
+ }
+
+ /**
+ * Returns the weighted arithmetic mean of the entries in the specified portion of
+ * the input array, or <code>Double.NaN</code> if the designated subarray
+ * is empty.
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if either array is null.</p>
+ * <p>
+ * See {@link Mean} for details on the computing algorithm. The two-pass algorithm
+ * described above is used here, with weights applied in computing both the original
+ * estimate and the correction factor.</p>
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if any of the following are true:
+ * <ul><li>the values array is null</li>
+ * <li>the weights array is null</li>
+ * <li>the weights array does not have the same length as the values array</li>
+ * <li>the weights array contains one or more infinite values</li>
+ * <li>the weights array contains one or more NaN values</li>
+ * <li>the weights array contains negative values</li>
+ * <li>the start and length arguments do not determine a valid array</li>
+ * </ul></p>
+ *
+ * @param values the input array
+ * @param weights the weights array
+ * @param begin index of the first array element to include
+ * @param length the number of elements to include
+ * @return the mean of the values or Double.NaN if length = 0
+ * @throws IllegalArgumentException if the parameters are not valid
+ * @since 2.1
+ */
+ public double evaluate(final double[] values, final double[] weights,
+ final int begin, final int length) {
+ if (test(values, weights, begin, length)) {
+ Sum sum = new Sum();
+
+ // Compute initial estimate using definitional formula
+ double sumw = sum.evaluate(weights,begin,length);
+ double xbarw = sum.evaluate(values, weights, begin, length) / sumw;
+
+ // Compute correction factor in second pass
+ double correction = 0;
+ for (int i = begin; i < begin + length; i++) {
+ correction += weights[i] * (values[i] - xbarw);
+ }
+ return xbarw + (correction/sumw);
+ }
+ return Double.NaN;
+ }
+
+ /**
+ * Returns the weighted arithmetic mean of the entries in the input array.
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if either array is null.</p>
+ * <p>
+ * See {@link Mean} for details on the computing algorithm. The two-pass algorithm
+ * described above is used here, with weights applied in computing both the original
+ * estimate and the correction factor.</p>
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if any of the following are true:
+ * <ul><li>the values array is null</li>
+ * <li>the weights array is null</li>
+ * <li>the weights array does not have the same length as the values array</li>
+ * <li>the weights array contains one or more infinite values</li>
+ * <li>the weights array contains one or more NaN values</li>
+ * <li>the weights array contains negative values</li>
+ * </ul></p>
+ *
+ * @param values the input array
+ * @param weights the weights array
+ * @return the mean of the values or Double.NaN if length = 0
+ * @throws IllegalArgumentException if the parameters are not valid
+ * @since 2.1
+ */
+ public double evaluate(final double[] values, final double[] weights) {
+ return evaluate(values, weights, 0, values.length);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public Mean copy() {
+ Mean result = new Mean();
+ copy(this, result);
+ return result;
+ }
+
+
+ /**
+ * Copies source to dest.
+ * <p>Neither source nor dest can be null.</p>
+ *
+ * @param source Mean to copy
+ * @param dest Mean to copy to
+ * @throws NullPointerException if either source or dest is null
+ */
+ public static void copy(Mean source, Mean dest) {
+ dest.setData(source.getDataRef());
+ dest.incMoment = source.incMoment;
+ dest.moment = source.moment.copy();
+ }
+}
diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/moment/SecondMoment.java b/src/main/java/org/apache/commons/math/stat/descriptive/moment/SecondMoment.java
new file mode 100644
index 0000000..ae8ef8e
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/descriptive/moment/SecondMoment.java
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.descriptive.moment;
+
+import java.io.Serializable;
+
+/**
+ * Computes a statistic related to the Second Central Moment. Specifically,
+ * what is computed is the sum of squared deviations from the sample mean.
+ * <p>
+ * The following recursive updating formula is used:</p>
+ * <p>
+ * Let <ul>
+ * <li> dev = (current obs - previous mean) </li>
+ * <li> n = number of observations (including current obs) </li>
+ * </ul>
+ * Then</p>
+ * <p>
+ * new value = old value + dev^2 * (n -1) / n.</p>
+ * <p>
+ * Returns <code>Double.NaN</code> if no data values have been added and
+ * returns <code>0</code> if there is just one value in the data set.</p>
+ * <p>
+ * <strong>Note that this implementation is not synchronized.</strong> If
+ * multiple threads access an instance of this class concurrently, and at least
+ * one of the threads invokes the <code>increment()</code> or
+ * <code>clear()</code> method, it must be synchronized externally.</p>
+ *
+ * @version $Revision: 811685 $ $Date: 2009-09-05 19:36:48 +0200 (sam. 05 sept. 2009) $
+ */
+public class SecondMoment extends FirstMoment implements Serializable {
+
+ /** Serializable version identifier */
+ private static final long serialVersionUID = 3942403127395076445L;
+
+ /** second moment of values that have been added */
+ protected double m2;
+
+ /**
+ * Create a SecondMoment instance
+ */
+ public SecondMoment() {
+ super();
+ m2 = Double.NaN;
+ }
+
+ /**
+ * Copy constructor, creates a new {@code SecondMoment} identical
+ * to the {@code original}
+ *
+ * @param original the {@code SecondMoment} instance to copy
+ */
+ public SecondMoment(SecondMoment original) {
+ super(original);
+ this.m2 = original.m2;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void increment(final double d) {
+ if (n < 1) {
+ m1 = m2 = 0.0;
+ }
+ super.increment(d);
+ m2 += ((double) n - 1) * dev * nDev;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void clear() {
+ super.clear();
+ m2 = Double.NaN;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public double getResult() {
+ return m2;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public SecondMoment copy() {
+ SecondMoment result = new SecondMoment();
+ copy(this, result);
+ return result;
+ }
+
+ /**
+ * Copies source to dest.
+ * <p>Neither source nor dest can be null.</p>
+ *
+ * @param source SecondMoment to copy
+ * @param dest SecondMoment to copy to
+ * @throws NullPointerException if either source or dest is null
+ */
+ public static void copy(SecondMoment source, SecondMoment dest) {
+ FirstMoment.copy(source, dest);
+ dest.m2 = source.m2;
+ }
+
+}
diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/moment/SemiVariance.java b/src/main/java/org/apache/commons/math/stat/descriptive/moment/SemiVariance.java
new file mode 100644
index 0000000..04aa456
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/descriptive/moment/SemiVariance.java
@@ -0,0 +1,379 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.math.stat.descriptive.moment;
+
+import java.io.Serializable;
+import org.apache.commons.math.exception.NullArgumentException;
+import org.apache.commons.math.exception.util.LocalizedFormats;
+import org.apache.commons.math.stat.descriptive.AbstractUnivariateStatistic;
+
+/**
+ * <p>Computes the semivariance of a set of values with respect to a given cutoff value.
+ * We define the <i>downside semivariance</i> of a set of values <code>x</code>
+ * against the <i>cutoff value</i> <code>cutoff</code> to be <br/>
+ * <code>&Sigma; (x[i] - target)<sup>2</sup> / df</code> <br/>
+ * where the sum is taken over all <code>i</code> such that <code>x[i] < cutoff</code>
+ * and <code>df</code> is the length of <code>x</code> (non-bias-corrected) or
+ * one less than this number (bias corrected). The <i>upside semivariance</i>
+ * is defined similarly, with the sum taken over values of <code>x</code> that
+ * exceed the cutoff value.</p>
+ *
+ * <p>The cutoff value defaults to the mean, bias correction defaults to <code>true</code>
+ * and the "variance direction" (upside or downside) defaults to downside. The variance direction
+ * and bias correction may be set using property setters or their values can provided as
+ * parameters to {@link #evaluate(double[], double, Direction, boolean, int, int)}.</p>
+ *
+ * <p>If the input array is null, <code>evaluate</code> methods throw
+ * <code>IllegalArgumentException.</code> If the array has length 1, <code>0</code>
+ * is returned, regardless of the value of the <code>cutoff.</code>
+ *
+ * <p><strong>Note that this class is not intended to be threadsafe.</strong> If
+ * multiple threads access an instance of this class concurrently, and one or
+ * more of these threads invoke property setters, external synchronization must
+ * be provided to ensure correct results.</p>
+ *
+ * @version $Revision: 1006299 $ $Date: 2010-10-10 16:47:17 +0200 (dim. 10 oct. 2010) $
+ * @since 2.1
+ */
+
+public class SemiVariance extends AbstractUnivariateStatistic implements Serializable {
+
+ /**
+ * The UPSIDE Direction is used to specify that the observations above the
+ * cutoff point will be used to calculate SemiVariance.
+ */
+ public static final Direction UPSIDE_VARIANCE = Direction.UPSIDE;
+
+ /**
+ * The DOWNSIDE Direction is used to specify that the observations below
+ * the cutoff point will be used to calculate SemiVariance
+ */
+ public static final Direction DOWNSIDE_VARIANCE = Direction.DOWNSIDE;
+
+ /** Serializable version identifier */
+ private static final long serialVersionUID = -2653430366886024994L;
+
+ /**
+ * Determines whether or not bias correction is applied when computing the
+ * value of the statisic. True means that bias is corrected.
+ */
+ private boolean biasCorrected = true;
+
+ /**
+ * Determines whether to calculate downside or upside SemiVariance.
+ */
+ private Direction varianceDirection = Direction.DOWNSIDE;
+
+ /**
+ * Constructs a SemiVariance with default (true) <code>biasCorrected</code>
+ * property and default (Downside) <code>varianceDirection</code> property.
+ */
+ public SemiVariance() {
+ }
+
+ /**
+ * Constructs a SemiVariance with the specified <code>biasCorrected</code>
+ * property and default (Downside) <code>varianceDirection</code> property.
+ *
+ * @param biasCorrected setting for bias correction - true means
+ * bias will be corrected and is equivalent to using the argumentless
+ * constructor
+ */
+ public SemiVariance(final boolean biasCorrected) {
+ this.biasCorrected = biasCorrected;
+ }
+
+
+ /**
+ * Constructs a SemiVariance with the specified <code>Direction</code> property
+ * and default (true) <code>biasCorrected</code> property
+ *
+ * @param direction setting for the direction of the SemiVariance
+ * to calculate
+ */
+ public SemiVariance(final Direction direction) {
+ this.varianceDirection = direction;
+ }
+
+
+ /**
+ * Constructs a SemiVariance with the specified <code>isBiasCorrected</code>
+ * property and the specified <code>Direction</code> property.
+ *
+ * @param corrected setting for bias correction - true means
+ * bias will be corrected and is equivalent to using the argumentless
+ * constructor
+ *
+ * @param direction setting for the direction of the SemiVariance
+ * to calculate
+ */
+ public SemiVariance(final boolean corrected, final Direction direction) {
+ this.biasCorrected = corrected;
+ this.varianceDirection = direction;
+ }
+
+
+ /**
+ * Copy constructor, creates a new {@code SemiVariance} identical
+ * to the {@code original}
+ *
+ * @param original the {@code SemiVariance} instance to copy
+ */
+ public SemiVariance(final SemiVariance original) {
+ copy(original, this);
+ }
+
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public SemiVariance copy() {
+ SemiVariance result = new SemiVariance();
+ copy(this, result);
+ return result;
+ }
+
+
+ /**
+ * Copies source to dest.
+ * <p>Neither source nor dest can be null.</p>
+ *
+ * @param source SemiVariance to copy
+ * @param dest SemiVariance to copy to
+ * @throws NullPointerException if either source or dest is null
+ */
+ public static void copy(final SemiVariance source, SemiVariance dest) {
+ dest.setData(source.getDataRef());
+ dest.biasCorrected = source.biasCorrected;
+ dest.varianceDirection = source.varianceDirection;
+ }
+
+
+ /**
+ * This method calculates {@link SemiVariance} for the entire array against the mean, using
+ * instance properties varianceDirection and biasCorrection.
+ *
+ * @param values the input array
+ * @return the SemiVariance
+ * @throws IllegalArgumentException if values is null
+ *
+ */
+ @Override
+ public double evaluate(final double[] values) {
+ if (values == null) {
+ throw new NullArgumentException(LocalizedFormats.INPUT_ARRAY);
+ }
+ return evaluate(values, 0, values.length);
+ }
+
+
+ /**
+ * <p>Returns the {@link SemiVariance} of the designated values against the mean, using
+ * instance properties varianceDirection and biasCorrection.</p>
+ *
+ * <p>Returns <code>NaN</code> if the array is empty and throws
+ * <code>IllegalArgumentException</code> if the array is null.</p>
+ *
+ * @param values the input array
+ * @param start index of the first array element to include
+ * @param length the number of elements to include
+ * @return the SemiVariance
+ * @throws IllegalArgumentException if the parameters are not valid
+ *
+ */
+ @Override
+ public double evaluate(final double[] values, final int start, final int length) {
+ double m = (new Mean()).evaluate(values, start, length);
+ return evaluate(values, m, varianceDirection, biasCorrected, 0, values.length);
+ }
+
+
+ /**
+ * This method calculates {@link SemiVariance} for the entire array against the mean, using
+ * the current value of the biasCorrection instance property.
+ *
+ * @param values the input array
+ * @param direction the {@link Direction} of the semivariance
+ * @return the SemiVariance
+ * @throws IllegalArgumentException if values is null
+ *
+ */
+ public double evaluate(final double[] values, Direction direction) {
+ double m = (new Mean()).evaluate(values);
+ return evaluate (values, m, direction, biasCorrected, 0, values.length);
+ }
+
+ /**
+ * <p>Returns the {@link SemiVariance} of the designated values against the cutoff, using
+ * instance properties variancDirection and biasCorrection.</p>
+ *
+ * <p>Returns <code>NaN</code> if the array is empty and throws
+ * <code>IllegalArgumentException</code> if the array is null.</p>
+ *
+ * @param values the input array
+ * @param cutoff the reference point
+ * @return the SemiVariance
+ * @throws IllegalArgumentException if values is null
+ */
+ public double evaluate(final double[] values, final double cutoff) {
+ return evaluate(values, cutoff, varianceDirection, biasCorrected, 0, values.length);
+ }
+
+ /**
+ * <p>Returns the {@link SemiVariance} of the designated values against the cutoff in the
+ * given direction, using the current value of the biasCorrection instance property.</p>
+ *
+ * <p>Returns <code>NaN</code> if the array is empty and throws
+ * <code>IllegalArgumentException</code> if the array is null.</p>
+ *
+ * @param values the input array
+ * @param cutoff the reference point
+ * @param direction the {@link Direction} of the semivariance
+ * @return the SemiVariance
+ * @throws IllegalArgumentException if values is null
+ */
+ public double evaluate(final double[] values, final double cutoff, final Direction direction) {
+ return evaluate(values, cutoff, direction, biasCorrected, 0, values.length);
+ }
+
+
+ /**
+ * <p>Returns the {@link SemiVariance} of the designated values against the cutoff
+ * in the given direction with the provided bias correction.</p>
+ *
+ * <p>Returns <code>NaN</code> if the array is empty and throws
+ * <code>IllegalArgumentException</code> if the array is null.</p>
+ *
+ * @param values the input array
+ * @param cutoff the reference point
+ * @param direction the {@link Direction} of the semivariance
+ * @param corrected the BiasCorrection flag
+ * @param start index of the first array element to include
+ * @param length the number of elements to include
+ * @return the SemiVariance
+ * @throws IllegalArgumentException if the parameters are not valid
+ *
+ */
+ public double evaluate (final double[] values, final double cutoff, final Direction direction,
+ final boolean corrected, final int start, final int length) {
+
+ test(values, start, length);
+ if (values.length == 0) {
+ return Double.NaN;
+ } else {
+ if (values.length == 1) {
+ return 0.0;
+ } else {
+ final boolean booleanDirection = direction.getDirection();
+
+ double dev = 0.0;
+ double sumsq = 0.0;
+ for (int i = start; i < length; i++) {
+ if ((values[i] > cutoff) == booleanDirection) {
+ dev = values[i] - cutoff;
+ sumsq += dev * dev;
+ }
+ }
+
+ if (corrected) {
+ return sumsq / (length - 1.0);
+ } else {
+ return sumsq / length;
+ }
+ }
+ }
+ }
+
+ /**
+ * Returns true iff biasCorrected property is set to true.
+ *
+ * @return the value of biasCorrected.
+ */
+ public boolean isBiasCorrected() {
+ return biasCorrected;
+ }
+
+ /**
+ * Sets the biasCorrected property.
+ *
+ * @param biasCorrected new biasCorrected property value
+ */
+ public void setBiasCorrected(boolean biasCorrected) {
+ this.biasCorrected = biasCorrected;
+ }
+
+ /**
+ * Returns the varianceDirection property.
+ *
+ * @return the varianceDirection
+ */
+ public Direction getVarianceDirection () {
+ return varianceDirection;
+ }
+
+ /**
+ * Sets the variance direction
+ *
+ * @param varianceDirection the direction of the semivariance
+ */
+ public void setVarianceDirection(Direction varianceDirection) {
+ this.varianceDirection = varianceDirection;
+ }
+
+ /**
+ * The direction of the semivariance - either upside or downside. The direction
+ * is represented by boolean, with true corresponding to UPSIDE semivariance.
+ */
+ public enum Direction {
+ /**
+ * The UPSIDE Direction is used to specify that the observations above the
+ * cutoff point will be used to calculate SemiVariance
+ */
+ UPSIDE (true),
+
+ /**
+ * The DOWNSIDE Direction is used to specify that the observations below
+ * the cutoff point will be used to calculate SemiVariance
+ */
+ DOWNSIDE (false);
+
+ /**
+ * boolean value UPSIDE <-> true
+ */
+ private boolean direction;
+
+ /**
+ * Create a Direction with the given value.
+ *
+ * @param b boolean value representing the Direction. True corresponds to UPSIDE.
+ */
+ Direction (boolean b) {
+ direction = b;
+ }
+
+ /**
+ * Returns the value of this Direction. True corresponds to UPSIDE.
+ *
+ * @return true if direction is UPSIDE; false otherwise
+ */
+ boolean getDirection () {
+ return direction;
+ }
+ }
+}
diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/moment/Skewness.java b/src/main/java/org/apache/commons/math/stat/descriptive/moment/Skewness.java
new file mode 100644
index 0000000..d16f956
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/descriptive/moment/Skewness.java
@@ -0,0 +1,213 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.descriptive.moment;
+
+import java.io.Serializable;
+
+import org.apache.commons.math.stat.descriptive.AbstractStorelessUnivariateStatistic;
+import org.apache.commons.math.util.FastMath;
+
+/**
+ * Computes the skewness of the available values.
+ * <p>
+ * We use the following (unbiased) formula to define skewness:</p>
+ * <p>
+ * skewness = [n / (n -1) (n - 2)] sum[(x_i - mean)^3] / std^3 </p>
+ * <p>
+ * where n is the number of values, mean is the {@link Mean} and std is the
+ * {@link StandardDeviation} </p>
+ * <p>
+ * <strong>Note that this implementation is not synchronized.</strong> If
+ * multiple threads access an instance of this class concurrently, and at least
+ * one of the threads invokes the <code>increment()</code> or
+ * <code>clear()</code> method, it must be synchronized externally. </p>
+ *
+ * @version $Revision: 1006299 $ $Date: 2010-10-10 16:47:17 +0200 (dim. 10 oct. 2010) $
+ */
+public class Skewness extends AbstractStorelessUnivariateStatistic implements Serializable {
+
+ /** Serializable version identifier */
+ private static final long serialVersionUID = 7101857578996691352L;
+
+ /** Third moment on which this statistic is based */
+ protected ThirdMoment moment = null;
+
+ /**
+ * Determines whether or not this statistic can be incremented or cleared.
+ * <p>
+ * Statistics based on (constructed from) external moments cannot
+ * be incremented or cleared.</p>
+ */
+ protected boolean incMoment;
+
+ /**
+ * Constructs a Skewness
+ */
+ public Skewness() {
+ incMoment = true;
+ moment = new ThirdMoment();
+ }
+
+ /**
+ * Constructs a Skewness with an external moment
+ * @param m3 external moment
+ */
+ public Skewness(final ThirdMoment m3) {
+ incMoment = false;
+ this.moment = m3;
+ }
+
+ /**
+ * Copy constructor, creates a new {@code Skewness} identical
+ * to the {@code original}
+ *
+ * @param original the {@code Skewness} instance to copy
+ */
+ public Skewness(Skewness original) {
+ copy(original, this);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void increment(final double d) {
+ if (incMoment) {
+ moment.increment(d);
+ }
+ }
+
+ /**
+ * Returns the value of the statistic based on the values that have been added.
+ * <p>
+ * See {@link Skewness} for the definition used in the computation.</p>
+ *
+ * @return the skewness of the available values.
+ */
+ @Override
+ public double getResult() {
+
+ if (moment.n < 3) {
+ return Double.NaN;
+ }
+ double variance = moment.m2 / (moment.n - 1);
+ if (variance < 10E-20) {
+ return 0.0d;
+ } else {
+ double n0 = moment.getN();
+ return (n0 * moment.m3) /
+ ((n0 - 1) * (n0 -2) * FastMath.sqrt(variance) * variance);
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public long getN() {
+ return moment.getN();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void clear() {
+ if (incMoment) {
+ moment.clear();
+ }
+ }
+
+ /**
+ * Returns the Skewness of the entries in the specifed portion of the
+ * input array.
+ * <p>
+ * See {@link Skewness} for the definition used in the computation.</p>
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if the array is null.</p>
+ *
+ * @param values the input array
+ * @param begin the index of the first array element to include
+ * @param length the number of elements to include
+ * @return the skewness of the values or Double.NaN if length is less than
+ * 3
+ * @throws IllegalArgumentException if the array is null or the array index
+ * parameters are not valid
+ */
+ @Override
+ public double evaluate(final double[] values,final int begin,
+ final int length) {
+
+ // Initialize the skewness
+ double skew = Double.NaN;
+
+ if (test(values, begin, length) && length > 2 ){
+ Mean mean = new Mean();
+ // Get the mean and the standard deviation
+ double m = mean.evaluate(values, begin, length);
+
+ // Calc the std, this is implemented here instead
+ // of using the standardDeviation method eliminate
+ // a duplicate pass to get the mean
+ double accum = 0.0;
+ double accum2 = 0.0;
+ for (int i = begin; i < begin + length; i++) {
+ final double d = values[i] - m;
+ accum += d * d;
+ accum2 += d;
+ }
+ final double variance = (accum - (accum2 * accum2 / length)) / (length - 1);
+
+ double accum3 = 0.0;
+ for (int i = begin; i < begin + length; i++) {
+ final double d = values[i] - m;
+ accum3 += d * d * d;
+ }
+ accum3 /= variance * FastMath.sqrt(variance);
+
+ // Get N
+ double n0 = length;
+
+ // Calculate skewness
+ skew = (n0 / ((n0 - 1) * (n0 - 2))) * accum3;
+ }
+ return skew;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public Skewness copy() {
+ Skewness result = new Skewness();
+ copy(this, result);
+ return result;
+ }
+
+ /**
+ * Copies source to dest.
+ * <p>Neither source nor dest can be null.</p>
+ *
+ * @param source Skewness to copy
+ * @param dest Skewness to copy to
+ * @throws NullPointerException if either source or dest is null
+ */
+ public static void copy(Skewness source, Skewness dest) {
+ dest.setData(source.getDataRef());
+ dest.moment = new ThirdMoment(source.moment.copy());
+ dest.incMoment = source.incMoment;
+ }
+}
diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/moment/StandardDeviation.java b/src/main/java/org/apache/commons/math/stat/descriptive/moment/StandardDeviation.java
new file mode 100644
index 0000000..837ae3b
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/descriptive/moment/StandardDeviation.java
@@ -0,0 +1,271 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.descriptive.moment;
+
+import java.io.Serializable;
+
+import org.apache.commons.math.stat.descriptive.AbstractStorelessUnivariateStatistic;
+import org.apache.commons.math.util.FastMath;
+
+/**
+ * Computes the sample standard deviation. The standard deviation
+ * is the positive square root of the variance. This implementation wraps a
+ * {@link Variance} instance. The <code>isBiasCorrected</code> property of the
+ * wrapped Variance instance is exposed, so that this class can be used to
+ * compute both the "sample standard deviation" (the square root of the
+ * bias-corrected "sample variance") or the "population standard deviation"
+ * (the square root of the non-bias-corrected "population variance"). See
+ * {@link Variance} for more information.
+ * <p>
+ * <strong>Note that this implementation is not synchronized.</strong> If
+ * multiple threads access an instance of this class concurrently, and at least
+ * one of the threads invokes the <code>increment()</code> or
+ * <code>clear()</code> method, it must be synchronized externally.</p>
+ *
+ * @version $Revision: 1006299 $ $Date: 2010-10-10 16:47:17 +0200 (dim. 10 oct. 2010) $
+ */
+public class StandardDeviation extends AbstractStorelessUnivariateStatistic
+ implements Serializable {
+
+ /** Serializable version identifier */
+ private static final long serialVersionUID = 5728716329662425188L;
+
+ /** Wrapped Variance instance */
+ private Variance variance = null;
+
+ /**
+ * Constructs a StandardDeviation. Sets the underlying {@link Variance}
+ * instance's <code>isBiasCorrected</code> property to true.
+ */
+ public StandardDeviation() {
+ variance = new Variance();
+ }
+
+ /**
+ * Constructs a StandardDeviation from an external second moment.
+ *
+ * @param m2 the external moment
+ */
+ public StandardDeviation(final SecondMoment m2) {
+ variance = new Variance(m2);
+ }
+
+ /**
+ * Copy constructor, creates a new {@code StandardDeviation} identical
+ * to the {@code original}
+ *
+ * @param original the {@code StandardDeviation} instance to copy
+ */
+ public StandardDeviation(StandardDeviation original) {
+ copy(original, this);
+ }
+
+ /**
+ * Contructs a StandardDeviation with the specified value for the
+ * <code>isBiasCorrected</code> property. If this property is set to
+ * <code>true</code>, the {@link Variance} used in computing results will
+ * use the bias-corrected, or "sample" formula. See {@link Variance} for
+ * details.
+ *
+ * @param isBiasCorrected whether or not the variance computation will use
+ * the bias-corrected formula
+ */
+ public StandardDeviation(boolean isBiasCorrected) {
+ variance = new Variance(isBiasCorrected);
+ }
+
+ /**
+ * Contructs a StandardDeviation with the specified value for the
+ * <code>isBiasCorrected</code> property and the supplied external moment.
+ * If <code>isBiasCorrected</code> is set to <code>true</code>, the
+ * {@link Variance} used in computing results will use the bias-corrected,
+ * or "sample" formula. See {@link Variance} for details.
+ *
+ * @param isBiasCorrected whether or not the variance computation will use
+ * the bias-corrected formula
+ * @param m2 the external moment
+ */
+ public StandardDeviation(boolean isBiasCorrected, SecondMoment m2) {
+ variance = new Variance(isBiasCorrected, m2);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void increment(final double d) {
+ variance.increment(d);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public long getN() {
+ return variance.getN();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public double getResult() {
+ return FastMath.sqrt(variance.getResult());
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void clear() {
+ variance.clear();
+ }
+
+ /**
+ * Returns the Standard Deviation of the entries in the input array, or
+ * <code>Double.NaN</code> if the array is empty.
+ * <p>
+ * Returns 0 for a single-value (i.e. length = 1) sample.</p>
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if the array is null.</p>
+ * <p>
+ * Does not change the internal state of the statistic.</p>
+ *
+ * @param values the input array
+ * @return the standard deviation of the values or Double.NaN if length = 0
+ * @throws IllegalArgumentException if the array is null
+ */
+ @Override
+ public double evaluate(final double[] values) {
+ return FastMath.sqrt(variance.evaluate(values));
+ }
+
+ /**
+ * Returns the Standard Deviation of the entries in the specified portion of
+ * the input array, or <code>Double.NaN</code> if the designated subarray
+ * is empty.
+ * <p>
+ * Returns 0 for a single-value (i.e. length = 1) sample. </p>
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if the array is null.</p>
+ * <p>
+ * Does not change the internal state of the statistic.</p>
+ *
+ * @param values the input array
+ * @param begin index of the first array element to include
+ * @param length the number of elements to include
+ * @return the standard deviation of the values or Double.NaN if length = 0
+ * @throws IllegalArgumentException if the array is null or the array index
+ * parameters are not valid
+ */
+ @Override
+ public double evaluate(final double[] values, final int begin, final int length) {
+ return FastMath.sqrt(variance.evaluate(values, begin, length));
+ }
+
+ /**
+ * Returns the Standard Deviation of the entries in the specified portion of
+ * the input array, using the precomputed mean value. Returns
+ * <code>Double.NaN</code> if the designated subarray is empty.
+ * <p>
+ * Returns 0 for a single-value (i.e. length = 1) sample.</p>
+ * <p>
+ * The formula used assumes that the supplied mean value is the arithmetic
+ * mean of the sample data, not a known population parameter. This method
+ * is supplied only to save computation when the mean has already been
+ * computed.</p>
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if the array is null.</p>
+ * <p>
+ * Does not change the internal state of the statistic.</p>
+ *
+ * @param values the input array
+ * @param mean the precomputed mean value
+ * @param begin index of the first array element to include
+ * @param length the number of elements to include
+ * @return the standard deviation of the values or Double.NaN if length = 0
+ * @throws IllegalArgumentException if the array is null or the array index
+ * parameters are not valid
+ */
+ public double evaluate(final double[] values, final double mean,
+ final int begin, final int length) {
+ return FastMath.sqrt(variance.evaluate(values, mean, begin, length));
+ }
+
+ /**
+ * Returns the Standard Deviation of the entries in the input array, using
+ * the precomputed mean value. Returns
+ * <code>Double.NaN</code> if the designated subarray is empty.
+ * <p>
+ * Returns 0 for a single-value (i.e. length = 1) sample.</p>
+ * <p>
+ * The formula used assumes that the supplied mean value is the arithmetic
+ * mean of the sample data, not a known population parameter. This method
+ * is supplied only to save computation when the mean has already been
+ * computed.</p>
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if the array is null.</p>
+ * <p>
+ * Does not change the internal state of the statistic.</p>
+ *
+ * @param values the input array
+ * @param mean the precomputed mean value
+ * @return the standard deviation of the values or Double.NaN if length = 0
+ * @throws IllegalArgumentException if the array is null
+ */
+ public double evaluate(final double[] values, final double mean) {
+ return FastMath.sqrt(variance.evaluate(values, mean));
+ }
+
+ /**
+ * @return Returns the isBiasCorrected.
+ */
+ public boolean isBiasCorrected() {
+ return variance.isBiasCorrected();
+ }
+
+ /**
+ * @param isBiasCorrected The isBiasCorrected to set.
+ */
+ public void setBiasCorrected(boolean isBiasCorrected) {
+ variance.setBiasCorrected(isBiasCorrected);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public StandardDeviation copy() {
+ StandardDeviation result = new StandardDeviation();
+ copy(this, result);
+ return result;
+ }
+
+
+ /**
+ * Copies source to dest.
+ * <p>Neither source nor dest can be null.</p>
+ *
+ * @param source StandardDeviation to copy
+ * @param dest StandardDeviation to copy to
+ * @throws NullPointerException if either source or dest is null
+ */
+ public static void copy(StandardDeviation source, StandardDeviation dest) {
+ dest.setData(source.getDataRef());
+ dest.variance = source.variance.copy();
+ }
+
+}
diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/moment/ThirdMoment.java b/src/main/java/org/apache/commons/math/stat/descriptive/moment/ThirdMoment.java
new file mode 100644
index 0000000..5c50989
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/descriptive/moment/ThirdMoment.java
@@ -0,0 +1,139 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.descriptive.moment;
+
+import java.io.Serializable;
+
+
+/**
+ * Computes a statistic related to the Third Central Moment. Specifically,
+ * what is computed is the sum of cubed deviations from the sample mean.
+ * <p>
+ * The following recursive updating formula is used:</p>
+ * <p>
+ * Let <ul>
+ * <li> dev = (current obs - previous mean) </li>
+ * <li> m2 = previous value of {@link SecondMoment} </li>
+ * <li> n = number of observations (including current obs) </li>
+ * </ul>
+ * Then</p>
+ * <p>
+ * new value = old value - 3 * (dev/n) * m2 + (n-1) * (n -2) * (dev^3/n^2)</p>
+ * <p>
+ * Returns <code>Double.NaN</code> if no data values have been added and
+ * returns <code>0</code> if there is just one value in the data set.</p>
+ * <p>
+ * <strong>Note that this implementation is not synchronized.</strong> If
+ * multiple threads access an instance of this class concurrently, and at least
+ * one of the threads invokes the <code>increment()</code> or
+ * <code>clear()</code> method, it must be synchronized externally.</p>
+ *
+ * @version $Revision: 811685 $ $Date: 2009-09-05 19:36:48 +0200 (sam. 05 sept. 2009) $
+ */
+public class ThirdMoment extends SecondMoment implements Serializable {
+
+ /** Serializable version identifier */
+ private static final long serialVersionUID = -7818711964045118679L;
+
+ /** third moment of values that have been added */
+ protected double m3;
+
+ /**
+ * Square of deviation of most recently added value from previous first
+ * moment, normalized by previous sample size. Retained to prevent
+ * repeated computation in higher order moments. nDevSq = nDev * nDev.
+ */
+ protected double nDevSq;
+
+ /**
+ * Create a FourthMoment instance
+ */
+ public ThirdMoment() {
+ super();
+ m3 = Double.NaN;
+ nDevSq = Double.NaN;
+ }
+
+ /**
+ * Copy constructor, creates a new {@code ThirdMoment} identical
+ * to the {@code original}
+ *
+ * @param original the {@code ThirdMoment} instance to copy
+ */
+ public ThirdMoment(ThirdMoment original) {
+ copy(original, this);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void increment(final double d) {
+ if (n < 1) {
+ m3 = m2 = m1 = 0.0;
+ }
+
+ double prevM2 = m2;
+ super.increment(d);
+ nDevSq = nDev * nDev;
+ double n0 = n;
+ m3 = m3 - 3.0 * nDev * prevM2 + (n0 - 1) * (n0 - 2) * nDevSq * dev;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public double getResult() {
+ return m3;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void clear() {
+ super.clear();
+ m3 = Double.NaN;
+ nDevSq = Double.NaN;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public ThirdMoment copy() {
+ ThirdMoment result = new ThirdMoment();
+ copy(this, result);
+ return result;
+ }
+
+ /**
+ * Copies source to dest.
+ * <p>Neither source nor dest can be null.</p>
+ *
+ * @param source ThirdMoment to copy
+ * @param dest ThirdMoment to copy to
+ * @throws NullPointerException if either source or dest is null
+ */
+ public static void copy(ThirdMoment source, ThirdMoment dest) {
+ SecondMoment.copy(source, dest);
+ dest.m3 = source.m3;
+ dest.nDevSq = source.nDevSq;
+ }
+
+}
diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/moment/Variance.java b/src/main/java/org/apache/commons/math/stat/descriptive/moment/Variance.java
new file mode 100644
index 0000000..6ce6835
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/descriptive/moment/Variance.java
@@ -0,0 +1,610 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.descriptive.moment;
+
+import java.io.Serializable;
+
+import org.apache.commons.math.exception.NullArgumentException;
+import org.apache.commons.math.exception.util.LocalizedFormats;
+import org.apache.commons.math.stat.descriptive.WeightedEvaluation;
+import org.apache.commons.math.stat.descriptive.AbstractStorelessUnivariateStatistic;
+
+/**
+ * Computes the variance of the available values. By default, the unbiased
+ * "sample variance" definitional formula is used:
+ * <p>
+ * variance = sum((x_i - mean)^2) / (n - 1) </p>
+ * <p>
+ * where mean is the {@link Mean} and <code>n</code> is the number
+ * of sample observations.</p>
+ * <p>
+ * The definitional formula does not have good numerical properties, so
+ * this implementation does not compute the statistic using the definitional
+ * formula. <ul>
+ * <li> The <code>getResult</code> method computes the variance using
+ * updating formulas based on West's algorithm, as described in
+ * <a href="http://doi.acm.org/10.1145/359146.359152"> Chan, T. F. and
+ * J. G. Lewis 1979, <i>Communications of the ACM</i>,
+ * vol. 22 no. 9, pp. 526-531.</a></li>
+ * <li> The <code>evaluate</code> methods leverage the fact that they have the
+ * full array of values in memory to execute a two-pass algorithm.
+ * Specifically, these methods use the "corrected two-pass algorithm" from
+ * Chan, Golub, Levesque, <i>Algorithms for Computing the Sample Variance</i>,
+ * American Statistician, vol. 37, no. 3 (1983) pp. 242-247.</li></ul>
+ * Note that adding values using <code>increment</code> or
+ * <code>incrementAll</code> and then executing <code>getResult</code> will
+ * sometimes give a different, less accurate, result than executing
+ * <code>evaluate</code> with the full array of values. The former approach
+ * should only be used when the full array of values is not available.</p>
+ * <p>
+ * The "population variance" ( sum((x_i - mean)^2) / n ) can also
+ * be computed using this statistic. The <code>isBiasCorrected</code>
+ * property determines whether the "population" or "sample" value is
+ * returned by the <code>evaluate</code> and <code>getResult</code> methods.
+ * To compute population variances, set this property to <code>false.</code>
+ * </p>
+ * <p>
+ * <strong>Note that this implementation is not synchronized.</strong> If
+ * multiple threads access an instance of this class concurrently, and at least
+ * one of the threads invokes the <code>increment()</code> or
+ * <code>clear()</code> method, it must be synchronized externally.</p>
+ *
+ * @version $Revision: 1006299 $ $Date: 2010-10-10 16:47:17 +0200 (dim. 10 oct. 2010) $
+ */
+public class Variance extends AbstractStorelessUnivariateStatistic implements Serializable, WeightedEvaluation {
+
+ /** Serializable version identifier */
+ private static final long serialVersionUID = -9111962718267217978L;
+
+ /** SecondMoment is used in incremental calculation of Variance*/
+ protected SecondMoment moment = null;
+
+ /**
+ * Boolean test to determine if this Variance should also increment
+ * the second moment, this evaluates to false when this Variance is
+ * constructed with an external SecondMoment as a parameter.
+ */
+ protected boolean incMoment = true;
+
+ /**
+ * Determines whether or not bias correction is applied when computing the
+ * value of the statisic. True means that bias is corrected. See
+ * {@link Variance} for details on the formula.
+ */
+ private boolean isBiasCorrected = true;
+
+ /**
+ * Constructs a Variance with default (true) <code>isBiasCorrected</code>
+ * property.
+ */
+ public Variance() {
+ moment = new SecondMoment();
+ }
+
+ /**
+ * Constructs a Variance based on an external second moment.
+ *
+ * @param m2 the SecondMoment (Third or Fourth moments work
+ * here as well.)
+ */
+ public Variance(final SecondMoment m2) {
+ incMoment = false;
+ this.moment = m2;
+ }
+
+ /**
+ * Constructs a Variance with the specified <code>isBiasCorrected</code>
+ * property
+ *
+ * @param isBiasCorrected setting for bias correction - true means
+ * bias will be corrected and is equivalent to using the argumentless
+ * constructor
+ */
+ public Variance(boolean isBiasCorrected) {
+ moment = new SecondMoment();
+ this.isBiasCorrected = isBiasCorrected;
+ }
+
+ /**
+ * Constructs a Variance with the specified <code>isBiasCorrected</code>
+ * property and the supplied external second moment.
+ *
+ * @param isBiasCorrected setting for bias correction - true means
+ * bias will be corrected
+ * @param m2 the SecondMoment (Third or Fourth moments work
+ * here as well.)
+ */
+ public Variance(boolean isBiasCorrected, SecondMoment m2) {
+ incMoment = false;
+ this.moment = m2;
+ this.isBiasCorrected = isBiasCorrected;
+ }
+
+ /**
+ * Copy constructor, creates a new {@code Variance} identical
+ * to the {@code original}
+ *
+ * @param original the {@code Variance} instance to copy
+ */
+ public Variance(Variance original) {
+ copy(original, this);
+ }
+
+ /**
+ * {@inheritDoc}
+ * <p>If all values are available, it is more accurate to use
+ * {@link #evaluate(double[])} rather than adding values one at a time
+ * using this method and then executing {@link #getResult}, since
+ * <code>evaluate</code> leverages the fact that is has the full
+ * list of values together to execute a two-pass algorithm.
+ * See {@link Variance}.</p>
+ */
+ @Override
+ public void increment(final double d) {
+ if (incMoment) {
+ moment.increment(d);
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public double getResult() {
+ if (moment.n == 0) {
+ return Double.NaN;
+ } else if (moment.n == 1) {
+ return 0d;
+ } else {
+ if (isBiasCorrected) {
+ return moment.m2 / (moment.n - 1d);
+ } else {
+ return moment.m2 / (moment.n);
+ }
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public long getN() {
+ return moment.getN();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void clear() {
+ if (incMoment) {
+ moment.clear();
+ }
+ }
+
+ /**
+ * Returns the variance of the entries in the input array, or
+ * <code>Double.NaN</code> if the array is empty.
+ * <p>
+ * See {@link Variance} for details on the computing algorithm.</p>
+ * <p>
+ * Returns 0 for a single-value (i.e. length = 1) sample.</p>
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if the array is null.</p>
+ * <p>
+ * Does not change the internal state of the statistic.</p>
+ *
+ * @param values the input array
+ * @return the variance of the values or Double.NaN if length = 0
+ * @throws IllegalArgumentException if the array is null
+ */
+ @Override
+ public double evaluate(final double[] values) {
+ if (values == null) {
+ throw new NullArgumentException(LocalizedFormats.INPUT_ARRAY);
+ }
+ return evaluate(values, 0, values.length);
+ }
+
+ /**
+ * Returns the variance of the entries in the specified portion of
+ * the input array, or <code>Double.NaN</code> if the designated subarray
+ * is empty.
+ * <p>
+ * See {@link Variance} for details on the computing algorithm.</p>
+ * <p>
+ * Returns 0 for a single-value (i.e. length = 1) sample.</p>
+ * <p>
+ * Does not change the internal state of the statistic.</p>
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if the array is null.</p>
+ *
+ * @param values the input array
+ * @param begin index of the first array element to include
+ * @param length the number of elements to include
+ * @return the variance of the values or Double.NaN if length = 0
+ * @throws IllegalArgumentException if the array is null or the array index
+ * parameters are not valid
+ */
+ @Override
+ public double evaluate(final double[] values, final int begin, final int length) {
+
+ double var = Double.NaN;
+
+ if (test(values, begin, length)) {
+ clear();
+ if (length == 1) {
+ var = 0.0;
+ } else if (length > 1) {
+ Mean mean = new Mean();
+ double m = mean.evaluate(values, begin, length);
+ var = evaluate(values, m, begin, length);
+ }
+ }
+ return var;
+ }
+
+ /**
+ * <p>Returns the weighted variance of the entries in the specified portion of
+ * the input array, or <code>Double.NaN</code> if the designated subarray
+ * is empty.</p>
+ * <p>
+ * Uses the formula <pre>
+ * &Sigma;(weights[i]*(values[i] - weightedMean)<sup>2</sup>)/(&Sigma;(weights[i]) - 1)
+ * </pre>
+ * where weightedMean is the weighted mean</p>
+ * <p>
+ * This formula will not return the same result as the unweighted variance when all
+ * weights are equal, unless all weights are equal to 1. The formula assumes that
+ * weights are to be treated as "expansion values," as will be the case if for example
+ * the weights represent frequency counts. To normalize weights so that the denominator
+ * in the variance computation equals the length of the input vector minus one, use <pre>
+ * <code>evaluate(values, MathUtils.normalizeArray(weights, values.length)); </code>
+ * </pre>
+ * <p>
+ * Returns 0 for a single-value (i.e. length = 1) sample.</p>
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if any of the following are true:
+ * <ul><li>the values array is null</li>
+ * <li>the weights array is null</li>
+ * <li>the weights array does not have the same length as the values array</li>
+ * <li>the weights array contains one or more infinite values</li>
+ * <li>the weights array contains one or more NaN values</li>
+ * <li>the weights array contains negative values</li>
+ * <li>the start and length arguments do not determine a valid array</li>
+ * </ul></p>
+ * <p>
+ * Does not change the internal state of the statistic.</p>
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if either array is null.</p>
+ *
+ * @param values the input array
+ * @param weights the weights array
+ * @param begin index of the first array element to include
+ * @param length the number of elements to include
+ * @return the weighted variance of the values or Double.NaN if length = 0
+ * @throws IllegalArgumentException if the parameters are not valid
+ * @since 2.1
+ */
+ public double evaluate(final double[] values, final double[] weights,
+ final int begin, final int length) {
+
+ double var = Double.NaN;
+
+ if (test(values, weights,begin, length)) {
+ clear();
+ if (length == 1) {
+ var = 0.0;
+ } else if (length > 1) {
+ Mean mean = new Mean();
+ double m = mean.evaluate(values, weights, begin, length);
+ var = evaluate(values, weights, m, begin, length);
+ }
+ }
+ return var;
+ }
+
+ /**
+ * <p>
+ * Returns the weighted variance of the entries in the the input array.</p>
+ * <p>
+ * Uses the formula <pre>
+ * &Sigma;(weights[i]*(values[i] - weightedMean)<sup>2</sup>)/(&Sigma;(weights[i]) - 1)
+ * </pre>
+ * where weightedMean is the weighted mean</p>
+ * <p>
+ * This formula will not return the same result as the unweighted variance when all
+ * weights are equal, unless all weights are equal to 1. The formula assumes that
+ * weights are to be treated as "expansion values," as will be the case if for example
+ * the weights represent frequency counts. To normalize weights so that the denominator
+ * in the variance computation equals the length of the input vector minus one, use <pre>
+ * <code>evaluate(values, MathUtils.normalizeArray(weights, values.length)); </code>
+ * </pre>
+ * <p>
+ * Returns 0 for a single-value (i.e. length = 1) sample.</p>
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if any of the following are true:
+ * <ul><li>the values array is null</li>
+ * <li>the weights array is null</li>
+ * <li>the weights array does not have the same length as the values array</li>
+ * <li>the weights array contains one or more infinite values</li>
+ * <li>the weights array contains one or more NaN values</li>
+ * <li>the weights array contains negative values</li>
+ * </ul></p>
+ * <p>
+ * Does not change the internal state of the statistic.</p>
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if either array is null.</p>
+ *
+ * @param values the input array
+ * @param weights the weights array
+ * @return the weighted variance of the values
+ * @throws IllegalArgumentException if the parameters are not valid
+ * @since 2.1
+ */
+ public double evaluate(final double[] values, final double[] weights) {
+ return evaluate(values, weights, 0, values.length);
+ }
+
+ /**
+ * Returns the variance of the entries in the specified portion of
+ * the input array, using the precomputed mean value. Returns
+ * <code>Double.NaN</code> if the designated subarray is empty.
+ * <p>
+ * See {@link Variance} for details on the computing algorithm.</p>
+ * <p>
+ * The formula used assumes that the supplied mean value is the arithmetic
+ * mean of the sample data, not a known population parameter. This method
+ * is supplied only to save computation when the mean has already been
+ * computed.</p>
+ * <p>
+ * Returns 0 for a single-value (i.e. length = 1) sample.</p>
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if the array is null.</p>
+ * <p>
+ * Does not change the internal state of the statistic.</p>
+ *
+ * @param values the input array
+ * @param mean the precomputed mean value
+ * @param begin index of the first array element to include
+ * @param length the number of elements to include
+ * @return the variance of the values or Double.NaN if length = 0
+ * @throws IllegalArgumentException if the array is null or the array index
+ * parameters are not valid
+ */
+ public double evaluate(final double[] values, final double mean,
+ final int begin, final int length) {
+
+ double var = Double.NaN;
+
+ if (test(values, begin, length)) {
+ if (length == 1) {
+ var = 0.0;
+ } else if (length > 1) {
+ double accum = 0.0;
+ double dev = 0.0;
+ double accum2 = 0.0;
+ for (int i = begin; i < begin + length; i++) {
+ dev = values[i] - mean;
+ accum += dev * dev;
+ accum2 += dev;
+ }
+ double len = length;
+ if (isBiasCorrected) {
+ var = (accum - (accum2 * accum2 / len)) / (len - 1.0);
+ } else {
+ var = (accum - (accum2 * accum2 / len)) / len;
+ }
+ }
+ }
+ return var;
+ }
+
+ /**
+ * Returns the variance of the entries in the input array, using the
+ * precomputed mean value. Returns <code>Double.NaN</code> if the array
+ * is empty.
+ * <p>
+ * See {@link Variance} for details on the computing algorithm.</p>
+ * <p>
+ * If <code>isBiasCorrected</code> is <code>true</code> the formula used
+ * assumes that the supplied mean value is the arithmetic mean of the
+ * sample data, not a known population parameter. If the mean is a known
+ * population parameter, or if the "population" version of the variance is
+ * desired, set <code>isBiasCorrected</code> to <code>false</code> before
+ * invoking this method.</p>
+ * <p>
+ * Returns 0 for a single-value (i.e. length = 1) sample.</p>
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if the array is null.</p>
+ * <p>
+ * Does not change the internal state of the statistic.</p>
+ *
+ * @param values the input array
+ * @param mean the precomputed mean value
+ * @return the variance of the values or Double.NaN if the array is empty
+ * @throws IllegalArgumentException if the array is null
+ */
+ public double evaluate(final double[] values, final double mean) {
+ return evaluate(values, mean, 0, values.length);
+ }
+
+ /**
+ * Returns the weighted variance of the entries in the specified portion of
+ * the input array, using the precomputed weighted mean value. Returns
+ * <code>Double.NaN</code> if the designated subarray is empty.
+ * <p>
+ * Uses the formula <pre>
+ * &Sigma;(weights[i]*(values[i] - mean)<sup>2</sup>)/(&Sigma;(weights[i]) - 1)
+ * </pre></p>
+ * <p>
+ * The formula used assumes that the supplied mean value is the weighted arithmetic
+ * mean of the sample data, not a known population parameter. This method
+ * is supplied only to save computation when the mean has already been
+ * computed.</p>
+ * <p>
+ * This formula will not return the same result as the unweighted variance when all
+ * weights are equal, unless all weights are equal to 1. The formula assumes that
+ * weights are to be treated as "expansion values," as will be the case if for example
+ * the weights represent frequency counts. To normalize weights so that the denominator
+ * in the variance computation equals the length of the input vector minus one, use <pre>
+ * <code>evaluate(values, MathUtils.normalizeArray(weights, values.length), mean); </code>
+ * </pre>
+ * <p>
+ * Returns 0 for a single-value (i.e. length = 1) sample.</p>
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if any of the following are true:
+ * <ul><li>the values array is null</li>
+ * <li>the weights array is null</li>
+ * <li>the weights array does not have the same length as the values array</li>
+ * <li>the weights array contains one or more infinite values</li>
+ * <li>the weights array contains one or more NaN values</li>
+ * <li>the weights array contains negative values</li>
+ * <li>the start and length arguments do not determine a valid array</li>
+ * </ul></p>
+ * <p>
+ * Does not change the internal state of the statistic.</p>
+ *
+ * @param values the input array
+ * @param weights the weights array
+ * @param mean the precomputed weighted mean value
+ * @param begin index of the first array element to include
+ * @param length the number of elements to include
+ * @return the variance of the values or Double.NaN if length = 0
+ * @throws IllegalArgumentException if the parameters are not valid
+ * @since 2.1
+ */
+ public double evaluate(final double[] values, final double[] weights,
+ final double mean, final int begin, final int length) {
+
+ double var = Double.NaN;
+
+ if (test(values, weights, begin, length)) {
+ if (length == 1) {
+ var = 0.0;
+ } else if (length > 1) {
+ double accum = 0.0;
+ double dev = 0.0;
+ double accum2 = 0.0;
+ for (int i = begin; i < begin + length; i++) {
+ dev = values[i] - mean;
+ accum += weights[i] * (dev * dev);
+ accum2 += weights[i] * dev;
+ }
+
+ double sumWts = 0;
+ for (int i = 0; i < weights.length; i++) {
+ sumWts += weights[i];
+ }
+
+ if (isBiasCorrected) {
+ var = (accum - (accum2 * accum2 / sumWts)) / (sumWts - 1.0);
+ } else {
+ var = (accum - (accum2 * accum2 / sumWts)) / sumWts;
+ }
+ }
+ }
+ return var;
+ }
+
+ /**
+ * <p>Returns the weighted variance of the values in the input array, using
+ * the precomputed weighted mean value.</p>
+ * <p>
+ * Uses the formula <pre>
+ * &Sigma;(weights[i]*(values[i] - mean)<sup>2</sup>)/(&Sigma;(weights[i]) - 1)
+ * </pre></p>
+ * <p>
+ * The formula used assumes that the supplied mean value is the weighted arithmetic
+ * mean of the sample data, not a known population parameter. This method
+ * is supplied only to save computation when the mean has already been
+ * computed.</p>
+ * <p>
+ * This formula will not return the same result as the unweighted variance when all
+ * weights are equal, unless all weights are equal to 1. The formula assumes that
+ * weights are to be treated as "expansion values," as will be the case if for example
+ * the weights represent frequency counts. To normalize weights so that the denominator
+ * in the variance computation equals the length of the input vector minus one, use <pre>
+ * <code>evaluate(values, MathUtils.normalizeArray(weights, values.length), mean); </code>
+ * </pre>
+ * <p>
+ * Returns 0 for a single-value (i.e. length = 1) sample.</p>
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if any of the following are true:
+ * <ul><li>the values array is null</li>
+ * <li>the weights array is null</li>
+ * <li>the weights array does not have the same length as the values array</li>
+ * <li>the weights array contains one or more infinite values</li>
+ * <li>the weights array contains one or more NaN values</li>
+ * <li>the weights array contains negative values</li>
+ * </ul></p>
+ * <p>
+ * Does not change the internal state of the statistic.</p>
+ *
+ * @param values the input array
+ * @param weights the weights array
+ * @param mean the precomputed weighted mean value
+ * @return the variance of the values or Double.NaN if length = 0
+ * @throws IllegalArgumentException if the parameters are not valid
+ * @since 2.1
+ */
+ public double evaluate(final double[] values, final double[] weights, final double mean) {
+ return evaluate(values, weights, mean, 0, values.length);
+ }
+
+ /**
+ * @return Returns the isBiasCorrected.
+ */
+ public boolean isBiasCorrected() {
+ return isBiasCorrected;
+ }
+
+ /**
+ * @param biasCorrected The isBiasCorrected to set.
+ */
+ public void setBiasCorrected(boolean biasCorrected) {
+ this.isBiasCorrected = biasCorrected;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public Variance copy() {
+ Variance result = new Variance();
+ copy(this, result);
+ return result;
+ }
+
+ /**
+ * Copies source to dest.
+ * <p>Neither source nor dest can be null.</p>
+ *
+ * @param source Variance to copy
+ * @param dest Variance to copy to
+ * @throws NullPointerException if either source or dest is null
+ */
+ public static void copy(Variance source, Variance dest) {
+ if (source == null ||
+ dest == null) {
+ throw new NullArgumentException();
+ }
+ dest.setData(source.getDataRef());
+ dest.moment = source.moment.copy();
+ dest.isBiasCorrected = source.isBiasCorrected;
+ dest.incMoment = source.incMoment;
+ }
+}
diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/moment/VectorialCovariance.java b/src/main/java/org/apache/commons/math/stat/descriptive/moment/VectorialCovariance.java
new file mode 100644
index 0000000..71afc68
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/descriptive/moment/VectorialCovariance.java
@@ -0,0 +1,152 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.descriptive.moment;
+
+import java.io.Serializable;
+import java.util.Arrays;
+
+import org.apache.commons.math.DimensionMismatchException;
+import org.apache.commons.math.linear.MatrixUtils;
+import org.apache.commons.math.linear.RealMatrix;
+
+/**
+ * Returns the covariance matrix of the available vectors.
+ * @since 1.2
+ * @version $Revision: 922714 $ $Date: 2010-03-14 02:35:14 +0100 (dim. 14 mars 2010) $
+ */
+public class VectorialCovariance implements Serializable {
+
+ /** Serializable version identifier */
+ private static final long serialVersionUID = 4118372414238930270L;
+
+ /** Sums for each component. */
+ private final double[] sums;
+
+ /** Sums of products for each component. */
+ private final double[] productsSums;
+
+ /** Indicator for bias correction. */
+ private final boolean isBiasCorrected;
+
+ /** Number of vectors in the sample. */
+ private long n;
+
+ /** Constructs a VectorialCovariance.
+ * @param dimension vectors dimension
+ * @param isBiasCorrected if true, computed the unbiased sample covariance,
+ * otherwise computes the biased population covariance
+ */
+ public VectorialCovariance(int dimension, boolean isBiasCorrected) {
+ sums = new double[dimension];
+ productsSums = new double[dimension * (dimension + 1) / 2];
+ n = 0;
+ this.isBiasCorrected = isBiasCorrected;
+ }
+
+ /**
+ * Add a new vector to the sample.
+ * @param v vector to add
+ * @exception DimensionMismatchException if the vector does not have the right dimension
+ */
+ public void increment(double[] v) throws DimensionMismatchException {
+ if (v.length != sums.length) {
+ throw new DimensionMismatchException(v.length, sums.length);
+ }
+ int k = 0;
+ for (int i = 0; i < v.length; ++i) {
+ sums[i] += v[i];
+ for (int j = 0; j <= i; ++j) {
+ productsSums[k++] += v[i] * v[j];
+ }
+ }
+ n++;
+ }
+
+ /**
+ * Get the covariance matrix.
+ * @return covariance matrix
+ */
+ public RealMatrix getResult() {
+
+ int dimension = sums.length;
+ RealMatrix result = MatrixUtils.createRealMatrix(dimension, dimension);
+
+ if (n > 1) {
+ double c = 1.0 / (n * (isBiasCorrected ? (n - 1) : n));
+ int k = 0;
+ for (int i = 0; i < dimension; ++i) {
+ for (int j = 0; j <= i; ++j) {
+ double e = c * (n * productsSums[k++] - sums[i] * sums[j]);
+ result.setEntry(i, j, e);
+ result.setEntry(j, i, e);
+ }
+ }
+ }
+
+ return result;
+
+ }
+
+ /**
+ * Get the number of vectors in the sample.
+ * @return number of vectors in the sample
+ */
+ public long getN() {
+ return n;
+ }
+
+ /**
+ * Clears the internal state of the Statistic
+ */
+ public void clear() {
+ n = 0;
+ Arrays.fill(sums, 0.0);
+ Arrays.fill(productsSums, 0.0);
+ }
+
+ /** {@inheritDoc} */
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + (isBiasCorrected ? 1231 : 1237);
+ result = prime * result + (int) (n ^ (n >>> 32));
+ result = prime * result + Arrays.hashCode(productsSums);
+ result = prime * result + Arrays.hashCode(sums);
+ return result;
+ }
+
+ /** {@inheritDoc} */
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj)
+ return true;
+ if (!(obj instanceof VectorialCovariance))
+ return false;
+ VectorialCovariance other = (VectorialCovariance) obj;
+ if (isBiasCorrected != other.isBiasCorrected)
+ return false;
+ if (n != other.n)
+ return false;
+ if (!Arrays.equals(productsSums, other.productsSums))
+ return false;
+ if (!Arrays.equals(sums, other.sums))
+ return false;
+ return true;
+ }
+
+}
diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/moment/VectorialMean.java b/src/main/java/org/apache/commons/math/stat/descriptive/moment/VectorialMean.java
new file mode 100644
index 0000000..ef57657
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/descriptive/moment/VectorialMean.java
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.descriptive.moment;
+
+import java.io.Serializable;
+import java.util.Arrays;
+
+import org.apache.commons.math.DimensionMismatchException;
+
+/**
+ * Returns the arithmetic mean of the available vectors.
+ * @since 1.2
+ * @version $Revision: 922714 $ $Date: 2010-03-14 02:35:14 +0100 (dim. 14 mars 2010) $
+ */
+public class VectorialMean implements Serializable {
+
+ /** Serializable version identifier */
+ private static final long serialVersionUID = 8223009086481006892L;
+
+ /** Means for each component. */
+ private final Mean[] means;
+
+ /** Constructs a VectorialMean.
+ * @param dimension vectors dimension
+ */
+ public VectorialMean(int dimension) {
+ means = new Mean[dimension];
+ for (int i = 0; i < dimension; ++i) {
+ means[i] = new Mean();
+ }
+ }
+
+ /**
+ * Add a new vector to the sample.
+ * @param v vector to add
+ * @exception DimensionMismatchException if the vector does not have the right dimension
+ */
+ public void increment(double[] v) throws DimensionMismatchException {
+ if (v.length != means.length) {
+ throw new DimensionMismatchException(v.length, means.length);
+ }
+ for (int i = 0; i < v.length; ++i) {
+ means[i].increment(v[i]);
+ }
+ }
+
+ /**
+ * Get the mean vector.
+ * @return mean vector
+ */
+ public double[] getResult() {
+ double[] result = new double[means.length];
+ for (int i = 0; i < result.length; ++i) {
+ result[i] = means[i].getResult();
+ }
+ return result;
+ }
+
+ /**
+ * Get the number of vectors in the sample.
+ * @return number of vectors in the sample
+ */
+ public long getN() {
+ return (means.length == 0) ? 0 : means[0].getN();
+ }
+
+ /** {@inheritDoc} */
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + Arrays.hashCode(means);
+ return result;
+ }
+
+ /** {@inheritDoc} */
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj)
+ return true;
+ if (!(obj instanceof VectorialMean))
+ return false;
+ VectorialMean other = (VectorialMean) obj;
+ if (!Arrays.equals(means, other.means))
+ return false;
+ return true;
+ }
+
+}
diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/moment/package.html b/src/main/java/org/apache/commons/math/stat/descriptive/moment/package.html
new file mode 100644
index 0000000..e024095
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/descriptive/moment/package.html
@@ -0,0 +1,20 @@
+<html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+ <!-- $Revision: 480440 $ $Date: 2006-11-29 08:14:12 +0100 (mer. 29 nov. 2006) $ -->
+ <body>Summary statistics based on moments.</body>
+</html>
diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/package.html b/src/main/java/org/apache/commons/math/stat/descriptive/package.html
new file mode 100644
index 0000000..981fda4
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/descriptive/package.html
@@ -0,0 +1,41 @@
+<html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+ <!-- $Revision: 811685 $ $Date: 2009-09-05 19:36:48 +0200 (sam. 05 sept. 2009) $ -->
+ <body>
+ Generic univariate summary statistic objects.
+
+ <h3>UnivariateStatistic API Usage Examples:</h3>
+ <h4>UnivariateStatistic:</h4>
+ <code>/* evaluation approach */<br/> double[] values = new double[] { 1, 2,
+ 3, 4, 5 };<br/> <span style="font-weight: bold;">UnivariateStatistic stat
+ = new Mean();</span><br/> System.out.println("mean = " + <span
+ style="font-weight: bold;">stat.evaluate(values)</span>);<br/> </code>
+ <h4>StorelessUnivariateStatistic:</h4>
+ <code>/* incremental approach */<br/> double[] values = new double[] { 1, 2,
+ 3, 4, 5 };<br/> <span style="font-weight: bold;">
+ StorelessUnivariateStatistic stat = new Mean();</span><br/>
+ System.out.println("mean before adding a value is NaN = " + <span
+ style="font-weight: bold;">stat.getResult()</span>);<br/> for (int i = 0;
+ i &lt; values.length; i++) {<br/> &nbsp;&nbsp;&nbsp; <span
+ style="font-weight: bold;">stat.increment(values[i]);</span><br/> &nbsp;&nbsp;&nbsp;
+ System.out.println("current mean = " + <span style="font-weight: bold;">
+ stat2.getResult()</span>);<br/> }<br/> <span style="font-weight: bold;">
+ stat.clear();</span><br/> System.out.println("mean after clear is NaN = "
+ + <span style="font-weight: bold;">stat.getResult()</span>);</code>
+ </body>
+</html>
diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/rank/Max.java b/src/main/java/org/apache/commons/math/stat/descriptive/rank/Max.java
new file mode 100644
index 0000000..1b15750
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/descriptive/rank/Max.java
@@ -0,0 +1,163 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.descriptive.rank;
+
+import java.io.Serializable;
+
+import org.apache.commons.math.stat.descriptive.AbstractStorelessUnivariateStatistic;
+
+/**
+ * Returns the maximum of the available values.
+ * <p>
+ * <ul>
+ * <li>The result is <code>NaN</code> iff all values are <code>NaN</code>
+ * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
+ * <li>If any of the values equals <code>Double.POSITIVE_INFINITY</code>,
+ * the result is <code>Double.POSITIVE_INFINITY.</code></li>
+ * </ul></p>
+* <p>
+ * <strong>Note that this implementation is not synchronized.</strong> If
+ * multiple threads access an instance of this class concurrently, and at least
+ * one of the threads invokes the <code>increment()</code> or
+ * <code>clear()</code> method, it must be synchronized externally.</p>
+ *
+ * @version $Revision: 1006299 $ $Date: 2010-10-10 16:47:17 +0200 (dim. 10 oct. 2010) $
+ */
+public class Max extends AbstractStorelessUnivariateStatistic implements Serializable {
+
+ /** Serializable version identifier */
+ private static final long serialVersionUID = -5593383832225844641L;
+
+ /** Number of values that have been added */
+ private long n;
+
+ /** Current value of the statistic */
+ private double value;
+
+ /**
+ * Create a Max instance
+ */
+ public Max() {
+ n = 0;
+ value = Double.NaN;
+ }
+
+ /**
+ * Copy constructor, creates a new {@code Max} identical
+ * to the {@code original}
+ *
+ * @param original the {@code Max} instance to copy
+ */
+ public Max(Max original) {
+ copy(original, this);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void increment(final double d) {
+ if (d > value || Double.isNaN(value)) {
+ value = d;
+ }
+ n++;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void clear() {
+ value = Double.NaN;
+ n = 0;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public double getResult() {
+ return value;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public long getN() {
+ return n;
+ }
+
+ /**
+ * Returns the maximum of the entries in the specified portion of
+ * the input array, or <code>Double.NaN</code> if the designated subarray
+ * is empty.
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if the array is null or
+ * the array index parameters are not valid.</p>
+ * <p>
+ * <ul>
+ * <li>The result is <code>NaN</code> iff all values are <code>NaN</code>
+ * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
+ * <li>If any of the values equals <code>Double.POSITIVE_INFINITY</code>,
+ * the result is <code>Double.POSITIVE_INFINITY.</code></li>
+ * </ul></p>
+ *
+ * @param values the input array
+ * @param begin index of the first array element to include
+ * @param length the number of elements to include
+ * @return the maximum of the values or Double.NaN if length = 0
+ * @throws IllegalArgumentException if the array is null or the array index
+ * parameters are not valid
+ */
+ @Override
+ public double evaluate(final double[] values, final int begin, final int length) {
+ double max = Double.NaN;
+ if (test(values, begin, length)) {
+ max = values[begin];
+ for (int i = begin; i < begin + length; i++) {
+ if (!Double.isNaN(values[i])) {
+ max = (max > values[i]) ? max : values[i];
+ }
+ }
+ }
+ return max;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public Max copy() {
+ Max result = new Max();
+ copy(this, result);
+ return result;
+ }
+
+ /**
+ * Copies source to dest.
+ * <p>Neither source nor dest can be null.</p>
+ *
+ * @param source Max to copy
+ * @param dest Max to copy to
+ * @throws NullPointerException if either source or dest is null
+ */
+ public static void copy(Max source, Max dest) {
+ dest.setData(source.getDataRef());
+ dest.n = source.n;
+ dest.value = source.value;
+ }
+}
diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/rank/Median.java b/src/main/java/org/apache/commons/math/stat/descriptive/rank/Median.java
new file mode 100644
index 0000000..6e13b13
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/descriptive/rank/Median.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.descriptive.rank;
+
+import java.io.Serializable;
+
+
+/**
+ * Returns the median of the available values. This is the same as the 50th percentile.
+ * See {@link Percentile} for a description of the algorithm used.
+ * <p>
+ * <strong>Note that this implementation is not synchronized.</strong> If
+ * multiple threads access an instance of this class concurrently, and at least
+ * one of the threads invokes the <code>increment()</code> or
+ * <code>clear()</code> method, it must be synchronized externally.</p>
+ *
+ * @version $Revision: 811685 $ $Date: 2009-09-05 19:36:48 +0200 (sam. 05 sept. 2009) $
+ */
+public class Median extends Percentile implements Serializable {
+
+ /** Serializable version identifier */
+ private static final long serialVersionUID = -3961477041290915687L;
+
+ /**
+ * Default constructor.
+ */
+ public Median() {
+ super(50.0);
+ }
+
+ /**
+ * Copy constructor, creates a new {@code Median} identical
+ * to the {@code original}
+ *
+ * @param original the {@code Median} instance to copy
+ */
+ public Median(Median original) {
+ super(original);
+ }
+
+}
diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/rank/Min.java b/src/main/java/org/apache/commons/math/stat/descriptive/rank/Min.java
new file mode 100644
index 0000000..1c264c6
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/descriptive/rank/Min.java
@@ -0,0 +1,163 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.descriptive.rank;
+
+import java.io.Serializable;
+
+import org.apache.commons.math.stat.descriptive.AbstractStorelessUnivariateStatistic;
+
+/**
+ * Returns the minimum of the available values.
+ * <p>
+ * <ul>
+ * <li>The result is <code>NaN</code> iff all values are <code>NaN</code>
+ * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
+ * <li>If any of the values equals <code>Double.NEGATIVE_INFINITY</code>,
+ * the result is <code>Double.NEGATIVE_INFINITY.</code></li>
+ * </ul></p>
+ * <p>
+ * <strong>Note that this implementation is not synchronized.</strong> If
+ * multiple threads access an instance of this class concurrently, and at least
+ * one of the threads invokes the <code>increment()</code> or
+ * <code>clear()</code> method, it must be synchronized externally.</p>
+ *
+ * @version $Revision: 1006299 $ $Date: 2010-10-10 16:47:17 +0200 (dim. 10 oct. 2010) $
+ */
+public class Min extends AbstractStorelessUnivariateStatistic implements Serializable {
+
+ /** Serializable version identifier */
+ private static final long serialVersionUID = -2941995784909003131L;
+
+ /**Number of values that have been added */
+ private long n;
+
+ /**Current value of the statistic */
+ private double value;
+
+ /**
+ * Create a Min instance
+ */
+ public Min() {
+ n = 0;
+ value = Double.NaN;
+ }
+
+ /**
+ * Copy constructor, creates a new {@code Min} identical
+ * to the {@code original}
+ *
+ * @param original the {@code Min} instance to copy
+ */
+ public Min(Min original) {
+ copy(original, this);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void increment(final double d) {
+ if (d < value || Double.isNaN(value)) {
+ value = d;
+ }
+ n++;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void clear() {
+ value = Double.NaN;
+ n = 0;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public double getResult() {
+ return value;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public long getN() {
+ return n;
+ }
+
+ /**
+ * Returns the minimum of the entries in the specified portion of
+ * the input array, or <code>Double.NaN</code> if the designated subarray
+ * is empty.
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if the array is null or
+ * the array index parameters are not valid.</p>
+ * <p>
+ * <ul>
+ * <li>The result is <code>NaN</code> iff all values are <code>NaN</code>
+ * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
+ * <li>If any of the values equals <code>Double.NEGATIVE_INFINITY</code>,
+ * the result is <code>Double.NEGATIVE_INFINITY.</code></li>
+ * </ul> </p>
+ *
+ * @param values the input array
+ * @param begin index of the first array element to include
+ * @param length the number of elements to include
+ * @return the minimum of the values or Double.NaN if length = 0
+ * @throws IllegalArgumentException if the array is null or the array index
+ * parameters are not valid
+ */
+ @Override
+ public double evaluate(final double[] values,final int begin, final int length) {
+ double min = Double.NaN;
+ if (test(values, begin, length)) {
+ min = values[begin];
+ for (int i = begin; i < begin + length; i++) {
+ if (!Double.isNaN(values[i])) {
+ min = (min < values[i]) ? min : values[i];
+ }
+ }
+ }
+ return min;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public Min copy() {
+ Min result = new Min();
+ copy(this, result);
+ return result;
+ }
+
+ /**
+ * Copies source to dest.
+ * <p>Neither source nor dest can be null.</p>
+ *
+ * @param source Min to copy
+ * @param dest Min to copy to
+ * @throws NullPointerException if either source or dest is null
+ */
+ public static void copy(Min source, Min dest) {
+ dest.setData(source.getDataRef());
+ dest.n = source.n;
+ dest.value = source.value;
+ }
+}
diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/rank/Percentile.java b/src/main/java/org/apache/commons/math/stat/descriptive/rank/Percentile.java
new file mode 100644
index 0000000..0c8a90f
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/descriptive/rank/Percentile.java
@@ -0,0 +1,497 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.descriptive.rank;
+
+import java.io.Serializable;
+import java.util.Arrays;
+
+import org.apache.commons.math.MathRuntimeException;
+import org.apache.commons.math.exception.util.LocalizedFormats;
+import org.apache.commons.math.stat.descriptive.AbstractUnivariateStatistic;
+import org.apache.commons.math.util.FastMath;
+
+/**
+ * Provides percentile computation.
+ * <p>
+ * There are several commonly used methods for estimating percentiles (a.k.a.
+ * quantiles) based on sample data. For large samples, the different methods
+ * agree closely, but when sample sizes are small, different methods will give
+ * significantly different results. The algorithm implemented here works as follows:
+ * <ol>
+ * <li>Let <code>n</code> be the length of the (sorted) array and
+ * <code>0 < p <= 100</code> be the desired percentile.</li>
+ * <li>If <code> n = 1 </code> return the unique array element (regardless of
+ * the value of <code>p</code>); otherwise </li>
+ * <li>Compute the estimated percentile position
+ * <code> pos = p * (n + 1) / 100</code> and the difference, <code>d</code>
+ * between <code>pos</code> and <code>floor(pos)</code> (i.e. the fractional
+ * part of <code>pos</code>). If <code>pos >= n</code> return the largest
+ * element in the array; otherwise</li>
+ * <li>Let <code>lower</code> be the element in position
+ * <code>floor(pos)</code> in the array and let <code>upper</code> be the
+ * next element in the array. Return <code>lower + d * (upper - lower)</code>
+ * </li>
+ * </ol></p>
+ * <p>
+ * To compute percentiles, the data must be at least partially ordered. Input
+ * arrays are copied and recursively partitioned using an ordering definition.
+ * The ordering used by <code>Arrays.sort(double[])</code> is the one determined
+ * by {@link java.lang.Double#compareTo(Double)}. This ordering makes
+ * <code>Double.NaN</code> larger than any other value (including
+ * <code>Double.POSITIVE_INFINITY</code>). Therefore, for example, the median
+ * (50th percentile) of
+ * <code>{0, 1, 2, 3, 4, Double.NaN}</code> evaluates to <code>2.5.</code></p>
+ * <p>
+ * Since percentile estimation usually involves interpolation between array
+ * elements, arrays containing <code>NaN</code> or infinite values will often
+ * result in <code>NaN<code> or infinite values returned.</p>
+ * <p>
+ * Since 2.2, Percentile implementation uses only selection instead of complete
+ * sorting and caches selection algorithm state between calls to the various
+ * {@code evaluate} methods when several percentiles are to be computed on the same data.
+ * This greatly improves efficiency, both for single percentile and multiple
+ * percentiles computations. However, it also induces a need to be sure the data
+ * at one call to {@code evaluate} is the same as the data with the cached algorithm
+ * state from the previous calls. Percentile does this by checking the array reference
+ * itself and a checksum of its content by default. If the user already knows he calls
+ * {@code evaluate} on an immutable array, he can save the checking time by calling the
+ * {@code evaluate} methods that do <em>not</em>
+ * </p>
+ * <p>
+ * <strong>Note that this implementation is not synchronized.</strong> If
+ * multiple threads access an instance of this class concurrently, and at least
+ * one of the threads invokes the <code>increment()</code> or
+ * <code>clear()</code> method, it must be synchronized externally.</p>
+ *
+ * @version $Revision: 1006299 $ $Date: 2010-10-10 16:47:17 +0200 (dim. 10 oct. 2010) $
+ */
+public class Percentile extends AbstractUnivariateStatistic implements Serializable {
+
+ /** Serializable version identifier */
+ private static final long serialVersionUID = -8091216485095130416L;
+
+ /** Minimum size under which we use a simple insertion sort rather than Hoare's select. */
+ private static final int MIN_SELECT_SIZE = 15;
+
+ /** Maximum number of partitioning pivots cached (each level double the number of pivots). */
+ private static final int MAX_CACHED_LEVELS = 10;
+
+ /** Determines what percentile is computed when evaluate() is activated
+ * with no quantile argument */
+ private double quantile = 0.0;
+
+ /** Cached pivots. */
+ private int[] cachedPivots;
+
+ /**
+ * Constructs a Percentile with a default quantile
+ * value of 50.0.
+ */
+ public Percentile() {
+ this(50.0);
+ }
+
+ /**
+ * Constructs a Percentile with the specific quantile value.
+ * @param p the quantile
+ * @throws IllegalArgumentException if p is not greater than 0 and less
+ * than or equal to 100
+ */
+ public Percentile(final double p) {
+ setQuantile(p);
+ cachedPivots = null;
+ }
+
+ /**
+ * Copy constructor, creates a new {@code Percentile} identical
+ * to the {@code original}
+ *
+ * @param original the {@code Percentile} instance to copy
+ */
+ public Percentile(Percentile original) {
+ copy(original, this);
+ }
+
+ /** {@inheritDoc} */
+ @Override
+ public void setData(final double[] values) {
+ if (values == null) {
+ cachedPivots = null;
+ } else {
+ cachedPivots = new int[(0x1 << MAX_CACHED_LEVELS) - 1];
+ Arrays.fill(cachedPivots, -1);
+ }
+ super.setData(values);
+ }
+
+ /** {@inheritDoc} */
+ @Override
+ public void setData(final double[] values, final int begin, final int length) {
+ if (values == null) {
+ cachedPivots = null;
+ } else {
+ cachedPivots = new int[(0x1 << MAX_CACHED_LEVELS) - 1];
+ Arrays.fill(cachedPivots, -1);
+ }
+ super.setData(values, begin, length);
+ }
+
+ /**
+ * Returns the result of evaluating the statistic over the stored data.
+ * <p>
+ * The stored array is the one which was set by previous calls to
+ * </p>
+ * @param p the percentile value to compute
+ * @return the value of the statistic applied to the stored data
+ */
+ public double evaluate(final double p) {
+ return evaluate(getDataRef(), p);
+ }
+
+ /**
+ * Returns an estimate of the <code>p</code>th percentile of the values
+ * in the <code>values</code> array.
+ * <p>
+ * Calls to this method do not modify the internal <code>quantile</code>
+ * state of this statistic.</p>
+ * <p>
+ * <ul>
+ * <li>Returns <code>Double.NaN</code> if <code>values</code> has length
+ * <code>0</code></li>
+ * <li>Returns (for any value of <code>p</code>) <code>values[0]</code>
+ * if <code>values</code> has length <code>1</code></li>
+ * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
+ * is null or p is not a valid quantile value (p must be greater than 0
+ * and less than or equal to 100) </li>
+ * </ul></p>
+ * <p>
+ * See {@link Percentile} for a description of the percentile estimation
+ * algorithm used.</p>
+ *
+ * @param values input array of values
+ * @param p the percentile value to compute
+ * @return the percentile value or Double.NaN if the array is empty
+ * @throws IllegalArgumentException if <code>values</code> is null
+ * or p is invalid
+ */
+ public double evaluate(final double[] values, final double p) {
+ test(values, 0, 0);
+ return evaluate(values, 0, values.length, p);
+ }
+
+ /**
+ * Returns an estimate of the <code>quantile</code>th percentile of the
+ * designated values in the <code>values</code> array. The quantile
+ * estimated is determined by the <code>quantile</code> property.
+ * <p>
+ * <ul>
+ * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li>
+ * <li>Returns (for any value of <code>quantile</code>)
+ * <code>values[begin]</code> if <code>length = 1 </code></li>
+ * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
+ * is null, or <code>start</code> or <code>length</code>
+ * is invalid</li>
+ * </ul></p>
+ * <p>
+ * See {@link Percentile} for a description of the percentile estimation
+ * algorithm used.</p>
+ *
+ * @param values the input array
+ * @param start index of the first array element to include
+ * @param length the number of elements to include
+ * @return the percentile value
+ * @throws IllegalArgumentException if the parameters are not valid
+ *
+ */
+ @Override
+ public double evaluate( final double[] values, final int start, final int length) {
+ return evaluate(values, start, length, quantile);
+ }
+
+ /**
+ * Returns an estimate of the <code>p</code>th percentile of the values
+ * in the <code>values</code> array, starting with the element in (0-based)
+ * position <code>begin</code> in the array and including <code>length</code>
+ * values.
+ * <p>
+ * Calls to this method do not modify the internal <code>quantile</code>
+ * state of this statistic.</p>
+ * <p>
+ * <ul>
+ * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li>
+ * <li>Returns (for any value of <code>p</code>) <code>values[begin]</code>
+ * if <code>length = 1 </code></li>
+ * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
+ * is null , <code>begin</code> or <code>length</code> is invalid, or
+ * <code>p</code> is not a valid quantile value (p must be greater than 0
+ * and less than or equal to 100)</li>
+ * </ul></p>
+ * <p>
+ * See {@link Percentile} for a description of the percentile estimation
+ * algorithm used.</p>
+ *
+ * @param values array of input values
+ * @param p the percentile to compute
+ * @param begin the first (0-based) element to include in the computation
+ * @param length the number of array elements to include
+ * @return the percentile value
+ * @throws IllegalArgumentException if the parameters are not valid or the
+ * input array is null
+ */
+ public double evaluate(final double[] values, final int begin,
+ final int length, final double p) {
+
+ test(values, begin, length);
+
+ if ((p > 100) || (p <= 0)) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.OUT_OF_BOUNDS_QUANTILE_VALUE, p);
+ }
+ if (length == 0) {
+ return Double.NaN;
+ }
+ if (length == 1) {
+ return values[begin]; // always return single value for n = 1
+ }
+ double n = length;
+ double pos = p * (n + 1) / 100;
+ double fpos = FastMath.floor(pos);
+ int intPos = (int) fpos;
+ double dif = pos - fpos;
+ double[] work;
+ int[] pivotsHeap;
+ if (values == getDataRef()) {
+ work = getDataRef();
+ pivotsHeap = cachedPivots;
+ } else {
+ work = new double[length];
+ System.arraycopy(values, begin, work, 0, length);
+ pivotsHeap = new int[(0x1 << MAX_CACHED_LEVELS) - 1];
+ Arrays.fill(pivotsHeap, -1);
+ }
+
+ if (pos < 1) {
+ return select(work, pivotsHeap, 0);
+ }
+ if (pos >= n) {
+ return select(work, pivotsHeap, length - 1);
+ }
+ double lower = select(work, pivotsHeap, intPos - 1);
+ double upper = select(work, pivotsHeap, intPos);
+ return lower + dif * (upper - lower);
+ }
+
+ /**
+ * Select the k<sup>th</sup> smallest element from work array
+ * @param work work array (will be reorganized during the call)
+ * @param pivotsHeap set of pivot index corresponding to elements that
+ * are already at their sorted location, stored as an implicit heap
+ * (i.e. a sorted binary tree stored in a flat array, where the
+ * children of a node at index n are at indices 2n+1 for the left
+ * child and 2n+2 for the right child, with 0-based indices)
+ * @param k index of the desired element
+ * @return k<sup>th</sup> smallest element
+ */
+ private double select(final double[] work, final int[] pivotsHeap, final int k) {
+
+ int begin = 0;
+ int end = work.length;
+ int node = 0;
+
+ while (end - begin > MIN_SELECT_SIZE) {
+
+ final int pivot;
+ if ((node < pivotsHeap.length) && (pivotsHeap[node] >= 0)) {
+ // the pivot has already been found in a previous call
+ // and the array has already been partitioned around it
+ pivot = pivotsHeap[node];
+ } else {
+ // select a pivot and partition work array around it
+ pivot = partition(work, begin, end, medianOf3(work, begin, end));
+ if (node < pivotsHeap.length) {
+ pivotsHeap[node] = pivot;
+ }
+ }
+
+ if (k == pivot) {
+ // the pivot was exactly the element we wanted
+ return work[k];
+ } else if (k < pivot) {
+ // the element is in the left partition
+ end = pivot;
+ node = Math.min(2 * node + 1, pivotsHeap.length); // the min is here to avoid integer overflow
+ } else {
+ // the element is in the right partition
+ begin = pivot + 1;
+ node = Math.min(2 * node + 2, pivotsHeap.length); // the min is here to avoid integer overflow
+ }
+
+ }
+
+ // the element is somewhere in the small sub-array
+ // sort the sub-array using insertion sort
+ insertionSort(work, begin, end);
+ return work[k];
+
+ }
+
+ /** Select a pivot index as the median of three
+ * @param work data array
+ * @param begin index of the first element of the slice
+ * @param end index after the last element of the slice
+ * @return the index of the median element chosen between the
+ * first, the middle and the last element of the array slice
+ */
+ int medianOf3(final double[] work, final int begin, final int end) {
+
+ final int inclusiveEnd = end - 1;
+ final int middle = begin + (inclusiveEnd - begin) / 2;
+ final double wBegin = work[begin];
+ final double wMiddle = work[middle];
+ final double wEnd = work[inclusiveEnd];
+
+ if (wBegin < wMiddle) {
+ if (wMiddle < wEnd) {
+ return middle;
+ } else {
+ return (wBegin < wEnd) ? inclusiveEnd : begin;
+ }
+ } else {
+ if (wBegin < wEnd) {
+ return begin;
+ } else {
+ return (wMiddle < wEnd) ? inclusiveEnd : middle;
+ }
+ }
+
+ }
+
+ /**
+ * Partition an array slice around a pivot
+ * <p>
+ * Partitioning exchanges array elements such that all elements
+ * smaller than pivot are before it and all elements larger than
+ * pivot are after it
+ * </p>
+ * @param work data array
+ * @param begin index of the first element of the slice
+ * @param end index after the last element of the slice
+ * @param pivot initial index of the pivot
+ * @return index of the pivot after partition
+ */
+ private int partition(final double[] work, final int begin, final int end, final int pivot) {
+
+ final double value = work[pivot];
+ work[pivot] = work[begin];
+
+ int i = begin + 1;
+ int j = end - 1;
+ while (i < j) {
+ while ((i < j) && (work[j] >= value)) {
+ --j;
+ }
+ while ((i < j) && (work[i] <= value)) {
+ ++i;
+ }
+
+ if (i < j) {
+ final double tmp = work[i];
+ work[i++] = work[j];
+ work[j--] = tmp;
+ }
+ }
+
+ if ((i >= end) || (work[i] > value)) {
+ --i;
+ }
+ work[begin] = work[i];
+ work[i] = value;
+ return i;
+
+ }
+
+ /**
+ * Sort in place a (small) array slice using insertion sort
+ * @param work array to sort
+ * @param begin index of the first element of the slice to sort
+ * @param end index after the last element of the slice to sort
+ */
+ private void insertionSort(final double[] work, final int begin, final int end) {
+ for (int j = begin + 1; j < end; j++) {
+ final double saved = work[j];
+ int i = j - 1;
+ while ((i >= begin) && (saved < work[i])) {
+ work[i + 1] = work[i];
+ i--;
+ }
+ work[i + 1] = saved;
+ }
+ }
+
+ /**
+ * Returns the value of the quantile field (determines what percentile is
+ * computed when evaluate() is called with no quantile argument).
+ *
+ * @return quantile
+ */
+ public double getQuantile() {
+ return quantile;
+ }
+
+ /**
+ * Sets the value of the quantile field (determines what percentile is
+ * computed when evaluate() is called with no quantile argument).
+ *
+ * @param p a value between 0 < p <= 100
+ * @throws IllegalArgumentException if p is not greater than 0 and less
+ * than or equal to 100
+ */
+ public void setQuantile(final double p) {
+ if (p <= 0 || p > 100) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.OUT_OF_BOUNDS_QUANTILE_VALUE, p);
+ }
+ quantile = p;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public Percentile copy() {
+ Percentile result = new Percentile();
+ copy(this, result);
+ return result;
+ }
+
+ /**
+ * Copies source to dest.
+ * <p>Neither source nor dest can be null.</p>
+ *
+ * @param source Percentile to copy
+ * @param dest Percentile to copy to
+ * @throws NullPointerException if either source or dest is null
+ */
+ public static void copy(Percentile source, Percentile dest) {
+ dest.setData(source.getDataRef());
+ if (source.cachedPivots != null) {
+ System.arraycopy(source.cachedPivots, 0, dest.cachedPivots, 0, source.cachedPivots.length);
+ }
+ dest.quantile = source.quantile;
+ }
+
+}
diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/rank/package.html b/src/main/java/org/apache/commons/math/stat/descriptive/rank/package.html
new file mode 100644
index 0000000..c69107b
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/descriptive/rank/package.html
@@ -0,0 +1,20 @@
+<html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+ <!-- $Revision: 480440 $ $Date: 2006-11-29 08:14:12 +0100 (mer. 29 nov. 2006) $ -->
+ <body>Summary statistics based on ranks.</body>
+</html>
diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/summary/Product.java b/src/main/java/org/apache/commons/math/stat/descriptive/summary/Product.java
new file mode 100644
index 0000000..c7d1d76
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/descriptive/summary/Product.java
@@ -0,0 +1,224 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.descriptive.summary;
+
+import java.io.Serializable;
+
+import org.apache.commons.math.stat.descriptive.AbstractStorelessUnivariateStatistic;
+import org.apache.commons.math.stat.descriptive.WeightedEvaluation;
+import org.apache.commons.math.util.FastMath;
+
+/**
+ * Returns the product of the available values.
+ * <p>
+ * If there are no values in the dataset, or any of the values are
+ * <code>NaN</code>, then <code>NaN</code> is returned.</p>
+ * <p>
+ * <strong>Note that this implementation is not synchronized.</strong> If
+ * multiple threads access an instance of this class concurrently, and at least
+ * one of the threads invokes the <code>increment()</code> or
+ * <code>clear()</code> method, it must be synchronized externally.</p>
+ *
+ * @version $Revision: 1006299 $ $Date: 2010-10-10 16:47:17 +0200 (dim. 10 oct. 2010) $
+ */
+public class Product extends AbstractStorelessUnivariateStatistic implements Serializable, WeightedEvaluation {
+
+ /** Serializable version identifier */
+ private static final long serialVersionUID = 2824226005990582538L;
+
+ /**The number of values that have been added */
+ private long n;
+
+ /**
+ * The current Running Product.
+ */
+ private double value;
+
+ /**
+ * Create a Product instance
+ */
+ public Product() {
+ n = 0;
+ value = Double.NaN;
+ }
+
+ /**
+ * Copy constructor, creates a new {@code Product} identical
+ * to the {@code original}
+ *
+ * @param original the {@code Product} instance to copy
+ */
+ public Product(Product original) {
+ copy(original, this);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void increment(final double d) {
+ if (n == 0) {
+ value = d;
+ } else {
+ value *= d;
+ }
+ n++;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public double getResult() {
+ return value;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public long getN() {
+ return n;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void clear() {
+ value = Double.NaN;
+ n = 0;
+ }
+
+ /**
+ * Returns the product of the entries in the specified portion of
+ * the input array, or <code>Double.NaN</code> if the designated subarray
+ * is empty.
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if the array is null.</p>
+ *
+ * @param values the input array
+ * @param begin index of the first array element to include
+ * @param length the number of elements to include
+ * @return the product of the values or Double.NaN if length = 0
+ * @throws IllegalArgumentException if the array is null or the array index
+ * parameters are not valid
+ */
+ @Override
+ public double evaluate(final double[] values, final int begin, final int length) {
+ double product = Double.NaN;
+ if (test(values, begin, length)) {
+ product = 1.0;
+ for (int i = begin; i < begin + length; i++) {
+ product *= values[i];
+ }
+ }
+ return product;
+ }
+
+ /**
+ * <p>Returns the weighted product of the entries in the specified portion of
+ * the input array, or <code>Double.NaN</code> if the designated subarray
+ * is empty.</p>
+ *
+ * <p>Throws <code>IllegalArgumentException</code> if any of the following are true:
+ * <ul><li>the values array is null</li>
+ * <li>the weights array is null</li>
+ * <li>the weights array does not have the same length as the values array</li>
+ * <li>the weights array contains one or more infinite values</li>
+ * <li>the weights array contains one or more NaN values</li>
+ * <li>the weights array contains negative values</li>
+ * <li>the start and length arguments do not determine a valid array</li>
+ * </ul></p>
+ *
+ * <p>Uses the formula, <pre>
+ * weighted product = &prod;values[i]<sup>weights[i]</sup>
+ * </pre>
+ * that is, the weights are applied as exponents when computing the weighted product.</p>
+ *
+ * @param values the input array
+ * @param weights the weights array
+ * @param begin index of the first array element to include
+ * @param length the number of elements to include
+ * @return the product of the values or Double.NaN if length = 0
+ * @throws IllegalArgumentException if the parameters are not valid
+ * @since 2.1
+ */
+ public double evaluate(final double[] values, final double[] weights,
+ final int begin, final int length) {
+ double product = Double.NaN;
+ if (test(values, weights, begin, length)) {
+ product = 1.0;
+ for (int i = begin; i < begin + length; i++) {
+ product *= FastMath.pow(values[i], weights[i]);
+ }
+ }
+ return product;
+ }
+
+ /**
+ * <p>Returns the weighted product of the entries in the input array.</p>
+ *
+ * <p>Throws <code>IllegalArgumentException</code> if any of the following are true:
+ * <ul><li>the values array is null</li>
+ * <li>the weights array is null</li>
+ * <li>the weights array does not have the same length as the values array</li>
+ * <li>the weights array contains one or more infinite values</li>
+ * <li>the weights array contains one or more NaN values</li>
+ * <li>the weights array contains negative values</li>
+ * </ul></p>
+ *
+ * <p>Uses the formula, <pre>
+ * weighted product = &prod;values[i]<sup>weights[i]</sup>
+ * </pre>
+ * that is, the weights are applied as exponents when computing the weighted product.</p>
+ *
+ * @param values the input array
+ * @param weights the weights array
+ * @return the product of the values or Double.NaN if length = 0
+ * @throws IllegalArgumentException if the parameters are not valid
+ * @since 2.1
+ */
+ public double evaluate(final double[] values, final double[] weights) {
+ return evaluate(values, weights, 0, values.length);
+ }
+
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public Product copy() {
+ Product result = new Product();
+ copy(this, result);
+ return result;
+ }
+
+ /**
+ * Copies source to dest.
+ * <p>Neither source nor dest can be null.</p>
+ *
+ * @param source Product to copy
+ * @param dest Product to copy to
+ * @throws NullPointerException if either source or dest is null
+ */
+ public static void copy(Product source, Product dest) {
+ dest.setData(source.getDataRef());
+ dest.n = source.n;
+ dest.value = source.value;
+ }
+
+}
diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/summary/Sum.java b/src/main/java/org/apache/commons/math/stat/descriptive/summary/Sum.java
new file mode 100644
index 0000000..7188ea8
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/descriptive/summary/Sum.java
@@ -0,0 +1,220 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.descriptive.summary;
+
+import java.io.Serializable;
+
+import org.apache.commons.math.stat.descriptive.AbstractStorelessUnivariateStatistic;
+
+
+/**
+ * Returns the sum of the available values.
+ * <p>
+ * If there are no values in the dataset, or any of the values are
+ * <code>NaN</code>, then <code>NaN</code> is returned.</p>
+ * <p>
+ * <strong>Note that this implementation is not synchronized.</strong> If
+ * multiple threads access an instance of this class concurrently, and at least
+ * one of the threads invokes the <code>increment()</code> or
+ * <code>clear()</code> method, it must be synchronized externally.</p>
+ *
+ * @version $Revision: 1006299 $ $Date: 2010-10-10 16:47:17 +0200 (dim. 10 oct. 2010) $
+ */
+public class Sum extends AbstractStorelessUnivariateStatistic implements Serializable {
+
+ /** Serializable version identifier */
+ private static final long serialVersionUID = -8231831954703408316L;
+
+ /** */
+ private long n;
+
+ /**
+ * The currently running sum.
+ */
+ private double value;
+
+ /**
+ * Create a Sum instance
+ */
+ public Sum() {
+ n = 0;
+ value = Double.NaN;
+ }
+
+ /**
+ * Copy constructor, creates a new {@code Sum} identical
+ * to the {@code original}
+ *
+ * @param original the {@code Sum} instance to copy
+ */
+ public Sum(Sum original) {
+ copy(original, this);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void increment(final double d) {
+ if (n == 0) {
+ value = d;
+ } else {
+ value += d;
+ }
+ n++;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public double getResult() {
+ return value;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public long getN() {
+ return n;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void clear() {
+ value = Double.NaN;
+ n = 0;
+ }
+
+ /**
+ * The sum of the entries in the specified portion of
+ * the input array, or <code>Double.NaN</code> if the designated subarray
+ * is empty.
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if the array is null.</p>
+ *
+ * @param values the input array
+ * @param begin index of the first array element to include
+ * @param length the number of elements to include
+ * @return the sum of the values or Double.NaN if length = 0
+ * @throws IllegalArgumentException if the array is null or the array index
+ * parameters are not valid
+ */
+ @Override
+ public double evaluate(final double[] values, final int begin, final int length) {
+ double sum = Double.NaN;
+ if (test(values, begin, length)) {
+ sum = 0.0;
+ for (int i = begin; i < begin + length; i++) {
+ sum += values[i];
+ }
+ }
+ return sum;
+ }
+
+ /**
+ * The weighted sum of the entries in the specified portion of
+ * the input array, or <code>Double.NaN</code> if the designated subarray
+ * is empty.
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if any of the following are true:
+ * <ul><li>the values array is null</li>
+ * <li>the weights array is null</li>
+ * <li>the weights array does not have the same length as the values array</li>
+ * <li>the weights array contains one or more infinite values</li>
+ * <li>the weights array contains one or more NaN values</li>
+ * <li>the weights array contains negative values</li>
+ * <li>the start and length arguments do not determine a valid array</li>
+ * </ul></p>
+ * <p>
+ * Uses the formula, <pre>
+ * weighted sum = &Sigma;(values[i] * weights[i])
+ * </pre></p>
+ *
+ * @param values the input array
+ * @param weights the weights array
+ * @param begin index of the first array element to include
+ * @param length the number of elements to include
+ * @return the sum of the values or Double.NaN if length = 0
+ * @throws IllegalArgumentException if the parameters are not valid
+ * @since 2.1
+ */
+ public double evaluate(final double[] values, final double[] weights,
+ final int begin, final int length) {
+ double sum = Double.NaN;
+ if (test(values, weights, begin, length)) {
+ sum = 0.0;
+ for (int i = begin; i < begin + length; i++) {
+ sum += values[i] * weights[i];
+ }
+ }
+ return sum;
+ }
+
+ /**
+ * The weighted sum of the entries in the the input array.
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if any of the following are true:
+ * <ul><li>the values array is null</li>
+ * <li>the weights array is null</li>
+ * <li>the weights array does not have the same length as the values array</li>
+ * <li>the weights array contains one or more infinite values</li>
+ * <li>the weights array contains one or more NaN values</li>
+ * <li>the weights array contains negative values</li>
+ * </ul></p>
+ * <p>
+ * Uses the formula, <pre>
+ * weighted sum = &Sigma;(values[i] * weights[i])
+ * </pre></p>
+ *
+ * @param values the input array
+ * @param weights the weights array
+ * @return the sum of the values or Double.NaN if length = 0
+ * @throws IllegalArgumentException if the parameters are not valid
+ * @since 2.1
+ */
+ public double evaluate(final double[] values, final double[] weights) {
+ return evaluate(values, weights, 0, values.length);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public Sum copy() {
+ Sum result = new Sum();
+ copy(this, result);
+ return result;
+ }
+
+ /**
+ * Copies source to dest.
+ * <p>Neither source nor dest can be null.</p>
+ *
+ * @param source Sum to copy
+ * @param dest Sum to copy to
+ * @throws NullPointerException if either source or dest is null
+ */
+ public static void copy(Sum source, Sum dest) {
+ dest.setData(source.getDataRef());
+ dest.n = source.n;
+ dest.value = source.value;
+ }
+
+}
diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/summary/SumOfLogs.java b/src/main/java/org/apache/commons/math/stat/descriptive/summary/SumOfLogs.java
new file mode 100644
index 0000000..331d5d2
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/descriptive/summary/SumOfLogs.java
@@ -0,0 +1,165 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.descriptive.summary;
+
+import java.io.Serializable;
+
+import org.apache.commons.math.stat.descriptive.AbstractStorelessUnivariateStatistic;
+import org.apache.commons.math.util.FastMath;
+
+/**
+ * Returns the sum of the natural logs for this collection of values.
+ * <p>
+ * Uses {@link java.lang.Math#log(double)} to compute the logs. Therefore,
+ * <ul>
+ * <li>If any of values are < 0, the result is <code>NaN.</code></li>
+ * <li>If all values are non-negative and less than
+ * <code>Double.POSITIVE_INFINITY</code>, but at least one value is 0, the
+ * result is <code>Double.NEGATIVE_INFINITY.</code></li>
+ * <li>If both <code>Double.POSITIVE_INFINITY</code> and
+ * <code>Double.NEGATIVE_INFINITY</code> are among the values, the result is
+ * <code>NaN.</code></li>
+ * </ul></p>
+ * <p>
+ * <strong>Note that this implementation is not synchronized.</strong> If
+ * multiple threads access an instance of this class concurrently, and at least
+ * one of the threads invokes the <code>increment()</code> or
+ * <code>clear()</code> method, it must be synchronized externally.</p>
+ *
+ * @version $Revision: 1006299 $ $Date: 2010-10-10 16:47:17 +0200 (dim. 10 oct. 2010) $
+ */
+public class SumOfLogs extends AbstractStorelessUnivariateStatistic implements Serializable {
+
+ /** Serializable version identifier */
+ private static final long serialVersionUID = -370076995648386763L;
+
+ /**Number of values that have been added */
+ private int n;
+
+ /**
+ * The currently running value
+ */
+ private double value;
+
+ /**
+ * Create a SumOfLogs instance
+ */
+ public SumOfLogs() {
+ value = 0d;
+ n = 0;
+ }
+
+ /**
+ * Copy constructor, creates a new {@code SumOfLogs} identical
+ * to the {@code original}
+ *
+ * @param original the {@code SumOfLogs} instance to copy
+ */
+ public SumOfLogs(SumOfLogs original) {
+ copy(original, this);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void increment(final double d) {
+ value += FastMath.log(d);
+ n++;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public double getResult() {
+ if (n > 0) {
+ return value;
+ } else {
+ return Double.NaN;
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public long getN() {
+ return n;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void clear() {
+ value = 0d;
+ n = 0;
+ }
+
+ /**
+ * Returns the sum of the natural logs of the entries in the specified portion of
+ * the input array, or <code>Double.NaN</code> if the designated subarray
+ * is empty.
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if the array is null.</p>
+ * <p>
+ * See {@link SumOfLogs}.</p>
+ *
+ * @param values the input array
+ * @param begin index of the first array element to include
+ * @param length the number of elements to include
+ * @return the sum of the natural logs of the values or Double.NaN if
+ * length = 0
+ * @throws IllegalArgumentException if the array is null or the array index
+ * parameters are not valid
+ */
+ @Override
+ public double evaluate(final double[] values, final int begin, final int length) {
+ double sumLog = Double.NaN;
+ if (test(values, begin, length)) {
+ sumLog = 0.0;
+ for (int i = begin; i < begin + length; i++) {
+ sumLog += FastMath.log(values[i]);
+ }
+ }
+ return sumLog;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public SumOfLogs copy() {
+ SumOfLogs result = new SumOfLogs();
+ copy(this, result);
+ return result;
+ }
+
+ /**
+ * Copies source to dest.
+ * <p>Neither source nor dest can be null.</p>
+ *
+ * @param source SumOfLogs to copy
+ * @param dest SumOfLogs to copy to
+ * @throws NullPointerException if either source or dest is null
+ */
+ public static void copy(SumOfLogs source, SumOfLogs dest) {
+ dest.setData(source.getDataRef());
+ dest.n = source.n;
+ dest.value = source.value;
+ }
+}
diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/summary/SumOfSquares.java b/src/main/java/org/apache/commons/math/stat/descriptive/summary/SumOfSquares.java
new file mode 100644
index 0000000..a632bf6
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/descriptive/summary/SumOfSquares.java
@@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.descriptive.summary;
+
+import java.io.Serializable;
+
+import org.apache.commons.math.stat.descriptive.AbstractStorelessUnivariateStatistic;
+
+/**
+ * Returns the sum of the squares of the available values.
+ * <p>
+ * If there are no values in the dataset, or any of the values are
+ * <code>NaN</code>, then <code>NaN</code> is returned.</p>
+ * <p>
+ * <strong>Note that this implementation is not synchronized.</strong> If
+ * multiple threads access an instance of this class concurrently, and at least
+ * one of the threads invokes the <code>increment()</code> or
+ * <code>clear()</code> method, it must be synchronized externally.</p>
+ *
+ * @version $Revision: 1006299 $ $Date: 2010-10-10 16:47:17 +0200 (dim. 10 oct. 2010) $
+ */
+public class SumOfSquares extends AbstractStorelessUnivariateStatistic implements Serializable {
+
+ /** Serializable version identifier */
+ private static final long serialVersionUID = 1460986908574398008L;
+
+ /** */
+ private long n;
+
+ /**
+ * The currently running sumSq
+ */
+ private double value;
+
+ /**
+ * Create a SumOfSquares instance
+ */
+ public SumOfSquares() {
+ n = 0;
+ value = Double.NaN;
+ }
+
+ /**
+ * Copy constructor, creates a new {@code SumOfSquares} identical
+ * to the {@code original}
+ *
+ * @param original the {@code SumOfSquares} instance to copy
+ */
+ public SumOfSquares(SumOfSquares original) {
+ copy(original, this);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void increment(final double d) {
+ if (n == 0) {
+ value = d * d;
+ } else {
+ value += d * d;
+ }
+ n++;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public double getResult() {
+ return value;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public long getN() {
+ return n;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void clear() {
+ value = Double.NaN;
+ n = 0;
+ }
+
+ /**
+ * Returns the sum of the squares of the entries in the specified portion of
+ * the input array, or <code>Double.NaN</code> if the designated subarray
+ * is empty.
+ * <p>
+ * Throws <code>IllegalArgumentException</code> if the array is null.</p>
+ *
+ * @param values the input array
+ * @param begin index of the first array element to include
+ * @param length the number of elements to include
+ * @return the sum of the squares of the values or Double.NaN if length = 0
+ * @throws IllegalArgumentException if the array is null or the array index
+ * parameters are not valid
+ */
+ @Override
+ public double evaluate(final double[] values,final int begin, final int length) {
+ double sumSq = Double.NaN;
+ if (test(values, begin, length)) {
+ sumSq = 0.0;
+ for (int i = begin; i < begin + length; i++) {
+ sumSq += values[i] * values[i];
+ }
+ }
+ return sumSq;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public SumOfSquares copy() {
+ SumOfSquares result = new SumOfSquares();
+ copy(this, result);
+ return result;
+ }
+
+ /**
+ * Copies source to dest.
+ * <p>Neither source nor dest can be null.</p>
+ *
+ * @param source SumOfSquares to copy
+ * @param dest SumOfSquares to copy to
+ * @throws NullPointerException if either source or dest is null
+ */
+ public static void copy(SumOfSquares source, SumOfSquares dest) {
+ dest.setData(source.getDataRef());
+ dest.n = source.n;
+ dest.value = source.value;
+ }
+
+}
diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/summary/package.html b/src/main/java/org/apache/commons/math/stat/descriptive/summary/package.html
new file mode 100644
index 0000000..db7f731
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/descriptive/summary/package.html
@@ -0,0 +1,20 @@
+<html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+ <!-- $Revision: 480440 $ $Date: 2006-11-29 08:14:12 +0100 (mer. 29 nov. 2006) $ -->
+ <body>Other summary statistics.</body>
+</html>
diff --git a/src/main/java/org/apache/commons/math/stat/inference/ChiSquareTest.java b/src/main/java/org/apache/commons/math/stat/inference/ChiSquareTest.java
new file mode 100644
index 0000000..6a3ecac
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/inference/ChiSquareTest.java
@@ -0,0 +1,222 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.inference;
+
+import org.apache.commons.math.MathException;
+
+/**
+ * An interface for Chi-Square tests.
+ * <p>This interface handles only known distributions. If the distribution is
+ * unknown and should be provided by a sample, then the {@link UnknownDistributionChiSquareTest
+ * UnknownDistributionChiSquareTest} extended interface should be used instead.</p>
+ * @version $Revision: 811685 $ $Date: 2009-09-05 19:36:48 +0200 (sam. 05 sept. 2009) $
+ */
+public interface ChiSquareTest {
+
+ /**
+ * Computes the <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm">
+ * Chi-Square statistic</a> comparing <code>observed</code> and <code>expected</code>
+ * frequency counts.
+ * <p>
+ * This statistic can be used to perform a Chi-Square test evaluating the null hypothesis that
+ * the observed counts follow the expected distribution.</p>
+ * <p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>Expected counts must all be positive.
+ * </li>
+ * <li>Observed counts must all be >= 0.
+ * </li>
+ * <li>The observed and expected arrays must have the same length and
+ * their common length must be at least 2.
+ * </li></ul></p><p>
+ * If any of the preconditions are not met, an
+ * <code>IllegalArgumentException</code> is thrown.</p>
+ *
+ * @param observed array of observed frequency counts
+ * @param expected array of expected frequency counts
+ * @return chiSquare statistic
+ * @throws IllegalArgumentException if preconditions are not met
+ */
+ double chiSquare(double[] expected, long[] observed)
+ throws IllegalArgumentException;
+
+ /**
+ * Returns the <i>observed significance level</i>, or <a href=
+ * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
+ * p-value</a>, associated with a
+ * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm">
+ * Chi-square goodness of fit test</a> comparing the <code>observed</code>
+ * frequency counts to those in the <code>expected</code> array.
+ * <p>
+ * The number returned is the smallest significance level at which one can reject
+ * the null hypothesis that the observed counts conform to the frequency distribution
+ * described by the expected counts.</p>
+ * <p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>Expected counts must all be positive.
+ * </li>
+ * <li>Observed counts must all be >= 0.
+ * </li>
+ * <li>The observed and expected arrays must have the same length and
+ * their common length must be at least 2.
+ * </li></ul></p><p>
+ * If any of the preconditions are not met, an
+ * <code>IllegalArgumentException</code> is thrown.</p>
+ *
+ * @param observed array of observed frequency counts
+ * @param expected array of expected frequency counts
+ * @return p-value
+ * @throws IllegalArgumentException if preconditions are not met
+ * @throws MathException if an error occurs computing the p-value
+ */
+ double chiSquareTest(double[] expected, long[] observed)
+ throws IllegalArgumentException, MathException;
+
+ /**
+ * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm">
+ * Chi-square goodness of fit test</a> evaluating the null hypothesis that the observed counts
+ * conform to the frequency distribution described by the expected counts, with
+ * significance level <code>alpha</code>. Returns true iff the null hypothesis can be rejected
+ * with 100 * (1 - alpha) percent confidence.
+ * <p>
+ * <strong>Example:</strong><br>
+ * To test the hypothesis that <code>observed</code> follows
+ * <code>expected</code> at the 99% level, use </p><p>
+ * <code>chiSquareTest(expected, observed, 0.01) </code></p>
+ * <p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>Expected counts must all be positive.
+ * </li>
+ * <li>Observed counts must all be >= 0.
+ * </li>
+ * <li>The observed and expected arrays must have the same length and
+ * their common length must be at least 2.
+ * <li> <code> 0 < alpha < 0.5 </code>
+ * </li></ul></p><p>
+ * If any of the preconditions are not met, an
+ * <code>IllegalArgumentException</code> is thrown.</p>
+ *
+ * @param observed array of observed frequency counts
+ * @param expected array of expected frequency counts
+ * @param alpha significance level of the test
+ * @return true iff null hypothesis can be rejected with confidence
+ * 1 - alpha
+ * @throws IllegalArgumentException if preconditions are not met
+ * @throws MathException if an error occurs performing the test
+ */
+ boolean chiSquareTest(double[] expected, long[] observed, double alpha)
+ throws IllegalArgumentException, MathException;
+
+ /**
+ * Computes the Chi-Square statistic associated with a
+ * <a href="http://www.itl.nist.gov/div898/handbook/prc/section4/prc45.htm">
+ * chi-square test of independence</a> based on the input <code>counts</code>
+ * array, viewed as a two-way table.
+ * <p>
+ * The rows of the 2-way table are
+ * <code>count[0], ... , count[count.length - 1] </code></p>
+ * <p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>All counts must be >= 0.
+ * </li>
+ * <li>The count array must be rectangular (i.e. all count[i] subarrays
+ * must have the same length).
+ * </li>
+ * <li>The 2-way table represented by <code>counts</code> must have at
+ * least 2 columns and at least 2 rows.
+ * </li>
+ * </li></ul></p><p>
+ * If any of the preconditions are not met, an
+ * <code>IllegalArgumentException</code> is thrown.</p>
+ *
+ * @param counts array representation of 2-way table
+ * @return chiSquare statistic
+ * @throws IllegalArgumentException if preconditions are not met
+ */
+ double chiSquare(long[][] counts)
+ throws IllegalArgumentException;
+
+ /**
+ * Returns the <i>observed significance level</i>, or <a href=
+ * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
+ * p-value</a>, associated with a
+ * <a href="http://www.itl.nist.gov/div898/handbook/prc/section4/prc45.htm">
+ * chi-square test of independence</a> based on the input <code>counts</code>
+ * array, viewed as a two-way table.
+ * <p>
+ * The rows of the 2-way table are
+ * <code>count[0], ... , count[count.length - 1] </code></p>
+ * <p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>All counts must be >= 0.
+ * </li>
+ * <li>The count array must be rectangular (i.e. all count[i] subarrays must have the same length).
+ * </li>
+ * <li>The 2-way table represented by <code>counts</code> must have at least 2 columns and
+ * at least 2 rows.
+ * </li>
+ * </li></ul></p><p>
+ * If any of the preconditions are not met, an
+ * <code>IllegalArgumentException</code> is thrown.</p>
+ *
+ * @param counts array representation of 2-way table
+ * @return p-value
+ * @throws IllegalArgumentException if preconditions are not met
+ * @throws MathException if an error occurs computing the p-value
+ */
+ double chiSquareTest(long[][] counts)
+ throws IllegalArgumentException, MathException;
+
+ /**
+ * Performs a <a href="http://www.itl.nist.gov/div898/handbook/prc/section4/prc45.htm">
+ * chi-square test of independence</a> evaluating the null hypothesis that the classifications
+ * represented by the counts in the columns of the input 2-way table are independent of the rows,
+ * with significance level <code>alpha</code>. Returns true iff the null hypothesis can be rejected
+ * with 100 * (1 - alpha) percent confidence.
+ * <p>
+ * The rows of the 2-way table are
+ * <code>count[0], ... , count[count.length - 1] </code></p>
+ * <p>
+ * <strong>Example:</strong><br>
+ * To test the null hypothesis that the counts in
+ * <code>count[0], ... , count[count.length - 1] </code>
+ * all correspond to the same underlying probability distribution at the 99% level, use </p><p>
+ * <code>chiSquareTest(counts, 0.01) </code></p>
+ * <p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>All counts must be >= 0.
+ * </li>
+ * <li>The count array must be rectangular (i.e. all count[i] subarrays must have the same length).
+ * </li>
+ * <li>The 2-way table represented by <code>counts</code> must have at least 2 columns and
+ * at least 2 rows.
+ * </li>
+ * </li></ul></p><p>
+ * If any of the preconditions are not met, an
+ * <code>IllegalArgumentException</code> is thrown.</p>
+ *
+ * @param counts array representation of 2-way table
+ * @param alpha significance level of the test
+ * @return true iff null hypothesis can be rejected with confidence
+ * 1 - alpha
+ * @throws IllegalArgumentException if preconditions are not met
+ * @throws MathException if an error occurs performing the test
+ */
+ boolean chiSquareTest(long[][] counts, double alpha)
+ throws IllegalArgumentException, MathException;
+
+}
diff --git a/src/main/java/org/apache/commons/math/stat/inference/ChiSquareTestImpl.java b/src/main/java/org/apache/commons/math/stat/inference/ChiSquareTestImpl.java
new file mode 100644
index 0000000..abb32a5
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/inference/ChiSquareTestImpl.java
@@ -0,0 +1,424 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.inference;
+
+import org.apache.commons.math.MathException;
+import org.apache.commons.math.MathRuntimeException;
+import org.apache.commons.math.distribution.ChiSquaredDistribution;
+import org.apache.commons.math.distribution.ChiSquaredDistributionImpl;
+import org.apache.commons.math.exception.util.LocalizedFormats;
+import org.apache.commons.math.util.FastMath;
+
+/**
+ * Implements Chi-Square test statistics defined in the
+ * {@link UnknownDistributionChiSquareTest} interface.
+ *
+ * @version $Revision: 990655 $ $Date: 2010-08-29 23:49:40 +0200 (dim. 29 août 2010) $
+ */
+public class ChiSquareTestImpl implements UnknownDistributionChiSquareTest {
+
+ /** Distribution used to compute inference statistics. */
+ private ChiSquaredDistribution distribution;
+
+ /**
+ * Construct a ChiSquareTestImpl
+ */
+ public ChiSquareTestImpl() {
+ this(new ChiSquaredDistributionImpl(1.0));
+ }
+
+ /**
+ * Create a test instance using the given distribution for computing
+ * inference statistics.
+ * @param x distribution used to compute inference statistics.
+ * @since 1.2
+ */
+ public ChiSquareTestImpl(ChiSquaredDistribution x) {
+ super();
+ setDistribution(x);
+ }
+ /**
+ * {@inheritDoc}
+ * <p><strong>Note: </strong>This implementation rescales the
+ * <code>expected</code> array if necessary to ensure that the sum of the
+ * expected and observed counts are equal.</p>
+ *
+ * @param observed array of observed frequency counts
+ * @param expected array of expected frequency counts
+ * @return chi-square test statistic
+ * @throws IllegalArgumentException if preconditions are not met
+ * or length is less than 2
+ */
+ public double chiSquare(double[] expected, long[] observed)
+ throws IllegalArgumentException {
+ if (expected.length < 2) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.INSUFFICIENT_DIMENSION, expected.length, 2);
+ }
+ if (expected.length != observed.length) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE, expected.length, observed.length);
+ }
+ checkPositive(expected);
+ checkNonNegative(observed);
+ double sumExpected = 0d;
+ double sumObserved = 0d;
+ for (int i = 0; i < observed.length; i++) {
+ sumExpected += expected[i];
+ sumObserved += observed[i];
+ }
+ double ratio = 1.0d;
+ boolean rescale = false;
+ if (FastMath.abs(sumExpected - sumObserved) > 10E-6) {
+ ratio = sumObserved / sumExpected;
+ rescale = true;
+ }
+ double sumSq = 0.0d;
+ for (int i = 0; i < observed.length; i++) {
+ if (rescale) {
+ final double dev = observed[i] - ratio * expected[i];
+ sumSq += dev * dev / (ratio * expected[i]);
+ } else {
+ final double dev = observed[i] - expected[i];
+ sumSq += dev * dev / expected[i];
+ }
+ }
+ return sumSq;
+ }
+
+ /**
+ * {@inheritDoc}
+ * <p><strong>Note: </strong>This implementation rescales the
+ * <code>expected</code> array if necessary to ensure that the sum of the
+ * expected and observed counts are equal.</p>
+ *
+ * @param observed array of observed frequency counts
+ * @param expected array of expected frequency counts
+ * @return p-value
+ * @throws IllegalArgumentException if preconditions are not met
+ * @throws MathException if an error occurs computing the p-value
+ */
+ public double chiSquareTest(double[] expected, long[] observed)
+ throws IllegalArgumentException, MathException {
+ distribution.setDegreesOfFreedom(expected.length - 1.0);
+ return 1.0 - distribution.cumulativeProbability(
+ chiSquare(expected, observed));
+ }
+
+ /**
+ * {@inheritDoc}
+ * <p><strong>Note: </strong>This implementation rescales the
+ * <code>expected</code> array if necessary to ensure that the sum of the
+ * expected and observed counts are equal.</p>
+ *
+ * @param observed array of observed frequency counts
+ * @param expected array of expected frequency counts
+ * @param alpha significance level of the test
+ * @return true iff null hypothesis can be rejected with confidence
+ * 1 - alpha
+ * @throws IllegalArgumentException if preconditions are not met
+ * @throws MathException if an error occurs performing the test
+ */
+ public boolean chiSquareTest(double[] expected, long[] observed,
+ double alpha) throws IllegalArgumentException, MathException {
+ if ((alpha <= 0) || (alpha > 0.5)) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.OUT_OF_BOUND_SIGNIFICANCE_LEVEL,
+ alpha, 0, 0.5);
+ }
+ return chiSquareTest(expected, observed) < alpha;
+ }
+
+ /**
+ * @param counts array representation of 2-way table
+ * @return chi-square test statistic
+ * @throws IllegalArgumentException if preconditions are not met
+ */
+ public double chiSquare(long[][] counts) throws IllegalArgumentException {
+
+ checkArray(counts);
+ int nRows = counts.length;
+ int nCols = counts[0].length;
+
+ // compute row, column and total sums
+ double[] rowSum = new double[nRows];
+ double[] colSum = new double[nCols];
+ double total = 0.0d;
+ for (int row = 0; row < nRows; row++) {
+ for (int col = 0; col < nCols; col++) {
+ rowSum[row] += counts[row][col];
+ colSum[col] += counts[row][col];
+ total += counts[row][col];
+ }
+ }
+
+ // compute expected counts and chi-square
+ double sumSq = 0.0d;
+ double expected = 0.0d;
+ for (int row = 0; row < nRows; row++) {
+ for (int col = 0; col < nCols; col++) {
+ expected = (rowSum[row] * colSum[col]) / total;
+ sumSq += ((counts[row][col] - expected) *
+ (counts[row][col] - expected)) / expected;
+ }
+ }
+ return sumSq;
+ }
+
+ /**
+ * @param counts array representation of 2-way table
+ * @return p-value
+ * @throws IllegalArgumentException if preconditions are not met
+ * @throws MathException if an error occurs computing the p-value
+ */
+ public double chiSquareTest(long[][] counts)
+ throws IllegalArgumentException, MathException {
+ checkArray(counts);
+ double df = ((double) counts.length -1) * ((double) counts[0].length - 1);
+ distribution.setDegreesOfFreedom(df);
+ return 1 - distribution.cumulativeProbability(chiSquare(counts));
+ }
+
+ /**
+ * @param counts array representation of 2-way table
+ * @param alpha significance level of the test
+ * @return true iff null hypothesis can be rejected with confidence
+ * 1 - alpha
+ * @throws IllegalArgumentException if preconditions are not met
+ * @throws MathException if an error occurs performing the test
+ */
+ public boolean chiSquareTest(long[][] counts, double alpha)
+ throws IllegalArgumentException, MathException {
+ if ((alpha <= 0) || (alpha > 0.5)) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.OUT_OF_BOUND_SIGNIFICANCE_LEVEL,
+ alpha, 0.0, 0.5);
+ }
+ return chiSquareTest(counts) < alpha;
+ }
+
+ /**
+ * @param observed1 array of observed frequency counts of the first data set
+ * @param observed2 array of observed frequency counts of the second data set
+ * @return chi-square test statistic
+ * @throws IllegalArgumentException if preconditions are not met
+ * @since 1.2
+ */
+ public double chiSquareDataSetsComparison(long[] observed1, long[] observed2)
+ throws IllegalArgumentException {
+
+ // Make sure lengths are same
+ if (observed1.length < 2) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.INSUFFICIENT_DIMENSION, observed1.length, 2);
+ }
+ if (observed1.length != observed2.length) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE,
+ observed1.length, observed2.length);
+ }
+
+ // Ensure non-negative counts
+ checkNonNegative(observed1);
+ checkNonNegative(observed2);
+
+ // Compute and compare count sums
+ long countSum1 = 0;
+ long countSum2 = 0;
+ boolean unequalCounts = false;
+ double weight = 0.0;
+ for (int i = 0; i < observed1.length; i++) {
+ countSum1 += observed1[i];
+ countSum2 += observed2[i];
+ }
+ // Ensure neither sample is uniformly 0
+ if (countSum1 == 0) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.OBSERVED_COUNTS_ALL_ZERO, 1);
+ }
+ if (countSum2 == 0) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.OBSERVED_COUNTS_ALL_ZERO, 2);
+ }
+ // Compare and compute weight only if different
+ unequalCounts = countSum1 != countSum2;
+ if (unequalCounts) {
+ weight = FastMath.sqrt((double) countSum1 / (double) countSum2);
+ }
+ // Compute ChiSquare statistic
+ double sumSq = 0.0d;
+ double dev = 0.0d;
+ double obs1 = 0.0d;
+ double obs2 = 0.0d;
+ for (int i = 0; i < observed1.length; i++) {
+ if (observed1[i] == 0 && observed2[i] == 0) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.OBSERVED_COUNTS_BOTTH_ZERO_FOR_ENTRY, i);
+ } else {
+ obs1 = observed1[i];
+ obs2 = observed2[i];
+ if (unequalCounts) { // apply weights
+ dev = obs1/weight - obs2 * weight;
+ } else {
+ dev = obs1 - obs2;
+ }
+ sumSq += (dev * dev) / (obs1 + obs2);
+ }
+ }
+ return sumSq;
+ }
+
+ /**
+ * @param observed1 array of observed frequency counts of the first data set
+ * @param observed2 array of observed frequency counts of the second data set
+ * @return p-value
+ * @throws IllegalArgumentException if preconditions are not met
+ * @throws MathException if an error occurs computing the p-value
+ * @since 1.2
+ */
+ public double chiSquareTestDataSetsComparison(long[] observed1, long[] observed2)
+ throws IllegalArgumentException, MathException {
+ distribution.setDegreesOfFreedom((double) observed1.length - 1);
+ return 1 - distribution.cumulativeProbability(
+ chiSquareDataSetsComparison(observed1, observed2));
+ }
+
+ /**
+ * @param observed1 array of observed frequency counts of the first data set
+ * @param observed2 array of observed frequency counts of the second data set
+ * @param alpha significance level of the test
+ * @return true iff null hypothesis can be rejected with confidence
+ * 1 - alpha
+ * @throws IllegalArgumentException if preconditions are not met
+ * @throws MathException if an error occurs performing the test
+ * @since 1.2
+ */
+ public boolean chiSquareTestDataSetsComparison(long[] observed1, long[] observed2,
+ double alpha) throws IllegalArgumentException, MathException {
+ if ((alpha <= 0) || (alpha > 0.5)) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.OUT_OF_BOUND_SIGNIFICANCE_LEVEL,
+ alpha, 0.0, 0.5);
+ }
+ return chiSquareTestDataSetsComparison(observed1, observed2) < alpha;
+ }
+
+ /**
+ * Checks to make sure that the input long[][] array is rectangular,
+ * has at least 2 rows and 2 columns, and has all non-negative entries,
+ * throwing IllegalArgumentException if any of these checks fail.
+ *
+ * @param in input 2-way table to check
+ * @throws IllegalArgumentException if the array is not valid
+ */
+ private void checkArray(long[][] in) throws IllegalArgumentException {
+
+ if (in.length < 2) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.INSUFFICIENT_DIMENSION, in.length, 2);
+ }
+
+ if (in[0].length < 2) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.INSUFFICIENT_DIMENSION, in[0].length, 2);
+ }
+
+ checkRectangular(in);
+ checkNonNegative(in);
+
+ }
+
+ //--------------------- Private array methods -- should find a utility home for these
+
+ /**
+ * Throws IllegalArgumentException if the input array is not rectangular.
+ *
+ * @param in array to be tested
+ * @throws NullPointerException if input array is null
+ * @throws IllegalArgumentException if input array is not rectangular
+ */
+ private void checkRectangular(long[][] in) {
+ for (int i = 1; i < in.length; i++) {
+ if (in[i].length != in[0].length) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.DIFFERENT_ROWS_LENGTHS,
+ in[i].length, in[0].length);
+ }
+ }
+ }
+
+ /**
+ * Check all entries of the input array are > 0.
+ *
+ * @param in array to be tested
+ * @exception IllegalArgumentException if one entry is not positive
+ */
+ private void checkPositive(double[] in) throws IllegalArgumentException {
+ for (int i = 0; i < in.length; i++) {
+ if (in[i] <= 0) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.NOT_POSITIVE_ELEMENT_AT_INDEX,
+ i, in[i]);
+ }
+ }
+ }
+
+ /**
+ * Check all entries of the input array are >= 0.
+ *
+ * @param in array to be tested
+ * @exception IllegalArgumentException if one entry is negative
+ */
+ private void checkNonNegative(long[] in) throws IllegalArgumentException {
+ for (int i = 0; i < in.length; i++) {
+ if (in[i] < 0) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.NEGATIVE_ELEMENT_AT_INDEX,
+ i, in[i]);
+ }
+ }
+ }
+
+ /**
+ * Check all entries of the input array are >= 0.
+ *
+ * @param in array to be tested
+ * @exception IllegalArgumentException if one entry is negative
+ */
+ private void checkNonNegative(long[][] in) throws IllegalArgumentException {
+ for (int i = 0; i < in.length; i ++) {
+ for (int j = 0; j < in[i].length; j++) {
+ if (in[i][j] < 0) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.NEGATIVE_ELEMENT_AT_2D_INDEX,
+ i, j, in[i][j]);
+ }
+ }
+ }
+ }
+
+ /**
+ * Modify the distribution used to compute inference statistics.
+ *
+ * @param value
+ * the new distribution
+ * @since 1.2
+ */
+ public void setDistribution(ChiSquaredDistribution value) {
+ distribution = value;
+ }
+}
diff --git a/src/main/java/org/apache/commons/math/stat/inference/OneWayAnova.java b/src/main/java/org/apache/commons/math/stat/inference/OneWayAnova.java
new file mode 100644
index 0000000..a2cde47
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/inference/OneWayAnova.java
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.inference;
+
+import org.apache.commons.math.MathException;
+import java.util.Collection;
+
+/**
+ * An interface for one-way ANOVA (analysis of variance).
+ *
+ * <p> Tests for differences between two or more categories of univariate data
+ * (for example, the body mass index of accountants, lawyers, doctors and
+ * computer programmers). When two categories are given, this is equivalent to
+ * the {@link org.apache.commons.math.stat.inference.TTest}.
+ * </p>
+ *
+ * @since 1.2
+ * @version $Revision: 811786 $ $Date: 2009-09-06 11:36:08 +0200 (dim. 06 sept. 2009) $
+ */
+public interface OneWayAnova {
+
+ /**
+ * Computes the ANOVA F-value for a collection of <code>double[]</code>
+ * arrays.
+ *
+ * <p><strong>Preconditions</strong>: <ul>
+ * <li>The categoryData <code>Collection</code> must contain
+ * <code>double[]</code> arrays.</li>
+ * <li> There must be at least two <code>double[]</code> arrays in the
+ * <code>categoryData</code> collection and each of these arrays must
+ * contain at least two values.</li></ul></p>
+ *
+ * @param categoryData <code>Collection</code> of <code>double[]</code>
+ * arrays each containing data for one category
+ * @return Fvalue
+ * @throws IllegalArgumentException if the preconditions are not met
+ * @throws MathException if the statistic can not be computed do to a
+ * convergence or other numerical error.
+ */
+ double anovaFValue(Collection<double[]> categoryData)
+ throws IllegalArgumentException, MathException;
+
+ /**
+ * Computes the ANOVA P-value for a collection of <code>double[]</code>
+ * arrays.
+ *
+ * <p><strong>Preconditions</strong>: <ul>
+ * <li>The categoryData <code>Collection</code> must contain
+ * <code>double[]</code> arrays.</li>
+ * <li> There must be at least two <code>double[]</code> arrays in the
+ * <code>categoryData</code> collection and each of these arrays must
+ * contain at least two values.</li></ul></p>
+ *
+ * @param categoryData <code>Collection</code> of <code>double[]</code>
+ * arrays each containing data for one category
+ * @return Pvalue
+ * @throws IllegalArgumentException if the preconditions are not met
+ * @throws MathException if the statistic can not be computed do to a
+ * convergence or other numerical error.
+ */
+ double anovaPValue(Collection<double[]> categoryData)
+ throws IllegalArgumentException, MathException;
+
+ /**
+ * Performs an ANOVA test, evaluating the null hypothesis that there
+ * is no difference among the means of the data categories.
+ *
+ * <p><strong>Preconditions</strong>: <ul>
+ * <li>The categoryData <code>Collection</code> must contain
+ * <code>double[]</code> arrays.</li>
+ * <li> There must be at least two <code>double[]</code> arrays in the
+ * <code>categoryData</code> collection and each of these arrays must
+ * contain at least two values.</li>
+ * <li>alpha must be strictly greater than 0 and less than or equal to 0.5.
+ * </li></ul></p>
+ *
+ * @param categoryData <code>Collection</code> of <code>double[]</code>
+ * arrays each containing data for one category
+ * @param alpha significance level of the test
+ * @return true if the null hypothesis can be rejected with
+ * confidence 1 - alpha
+ * @throws IllegalArgumentException if the preconditions are not met
+ * @throws MathException if the statistic can not be computed do to a
+ * convergence or other numerical error.
+ */
+ boolean anovaTest(Collection<double[]> categoryData, double alpha)
+ throws IllegalArgumentException, MathException;
+
+}
diff --git a/src/main/java/org/apache/commons/math/stat/inference/OneWayAnovaImpl.java b/src/main/java/org/apache/commons/math/stat/inference/OneWayAnovaImpl.java
new file mode 100644
index 0000000..a47d0cf
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/inference/OneWayAnovaImpl.java
@@ -0,0 +1,210 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.inference;
+
+import java.util.Collection;
+
+import org.apache.commons.math.MathException;
+import org.apache.commons.math.MathRuntimeException;
+import org.apache.commons.math.distribution.FDistribution;
+import org.apache.commons.math.distribution.FDistributionImpl;
+import org.apache.commons.math.exception.util.LocalizedFormats;
+import org.apache.commons.math.stat.descriptive.summary.Sum;
+import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
+
+
+/**
+ * Implements one-way ANOVA statistics defined in the {@link OneWayAnovaImpl}
+ * interface.
+ *
+ * <p>Uses the
+ * {@link org.apache.commons.math.distribution.FDistribution
+ * commons-math F Distribution implementation} to estimate exact p-values.</p>
+ *
+ * <p>This implementation is based on a description at
+ * http://faculty.vassar.edu/lowry/ch13pt1.html</p>
+ * <pre>
+ * Abbreviations: bg = between groups,
+ * wg = within groups,
+ * ss = sum squared deviations
+ * </pre>
+ *
+ * @since 1.2
+ * @version $Revision: 983921 $ $Date: 2010-08-10 12:46:06 +0200 (mar. 10 août 2010) $
+ */
+public class OneWayAnovaImpl implements OneWayAnova {
+
+ /**
+ * Default constructor.
+ */
+ public OneWayAnovaImpl() {
+ }
+
+ /**
+ * {@inheritDoc}<p>
+ * This implementation computes the F statistic using the definitional
+ * formula<pre>
+ * F = msbg/mswg</pre>
+ * where<pre>
+ * msbg = between group mean square
+ * mswg = within group mean square</pre>
+ * are as defined <a href="http://faculty.vassar.edu/lowry/ch13pt1.html">
+ * here</a></p>
+ */
+ public double anovaFValue(Collection<double[]> categoryData)
+ throws IllegalArgumentException, MathException {
+ AnovaStats a = anovaStats(categoryData);
+ return a.F;
+ }
+
+ /**
+ * {@inheritDoc}<p>
+ * This implementation uses the
+ * {@link org.apache.commons.math.distribution.FDistribution
+ * commons-math F Distribution implementation} to estimate the exact
+ * p-value, using the formula<pre>
+ * p = 1 - cumulativeProbability(F)</pre>
+ * where <code>F</code> is the F value and <code>cumulativeProbability</code>
+ * is the commons-math implementation of the F distribution.</p>
+ */
+ public double anovaPValue(Collection<double[]> categoryData)
+ throws IllegalArgumentException, MathException {
+ AnovaStats a = anovaStats(categoryData);
+ FDistribution fdist = new FDistributionImpl(a.dfbg, a.dfwg);
+ return 1.0 - fdist.cumulativeProbability(a.F);
+ }
+
+ /**
+ * {@inheritDoc}<p>
+ * This implementation uses the
+ * {@link org.apache.commons.math.distribution.FDistribution
+ * commons-math F Distribution implementation} to estimate the exact
+ * p-value, using the formula<pre>
+ * p = 1 - cumulativeProbability(F)</pre>
+ * where <code>F</code> is the F value and <code>cumulativeProbability</code>
+ * is the commons-math implementation of the F distribution.</p>
+ * <p>True is returned iff the estimated p-value is less than alpha.</p>
+ */
+ public boolean anovaTest(Collection<double[]> categoryData, double alpha)
+ throws IllegalArgumentException, MathException {
+ if ((alpha <= 0) || (alpha > 0.5)) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.OUT_OF_BOUND_SIGNIFICANCE_LEVEL,
+ alpha, 0, 0.5);
+ }
+ return anovaPValue(categoryData) < alpha;
+ }
+
+
+ /**
+ * This method actually does the calculations (except P-value).
+ *
+ * @param categoryData <code>Collection</code> of <code>double[]</code>
+ * arrays each containing data for one category
+ * @return computed AnovaStats
+ * @throws IllegalArgumentException if categoryData does not meet
+ * preconditions specified in the interface definition
+ * @throws MathException if an error occurs computing the Anova stats
+ */
+ private AnovaStats anovaStats(Collection<double[]> categoryData)
+ throws IllegalArgumentException, MathException {
+
+ // check if we have enough categories
+ if (categoryData.size() < 2) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.TWO_OR_MORE_CATEGORIES_REQUIRED,
+ categoryData.size());
+ }
+
+ // check if each category has enough data and all is double[]
+ for (double[] array : categoryData) {
+ if (array.length <= 1) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.TWO_OR_MORE_VALUES_IN_CATEGORY_REQUIRED,
+ array.length);
+ }
+ }
+
+ int dfwg = 0;
+ double sswg = 0;
+ Sum totsum = new Sum();
+ SumOfSquares totsumsq = new SumOfSquares();
+ int totnum = 0;
+
+ for (double[] data : categoryData) {
+
+ Sum sum = new Sum();
+ SumOfSquares sumsq = new SumOfSquares();
+ int num = 0;
+
+ for (int i = 0; i < data.length; i++) {
+ double val = data[i];
+
+ // within category
+ num++;
+ sum.increment(val);
+ sumsq.increment(val);
+
+ // for all categories
+ totnum++;
+ totsum.increment(val);
+ totsumsq.increment(val);
+ }
+ dfwg += num - 1;
+ double ss = sumsq.getResult() - sum.getResult() * sum.getResult() / num;
+ sswg += ss;
+ }
+ double sst = totsumsq.getResult() - totsum.getResult() *
+ totsum.getResult()/totnum;
+ double ssbg = sst - sswg;
+ int dfbg = categoryData.size() - 1;
+ double msbg = ssbg/dfbg;
+ double mswg = sswg/dfwg;
+ double F = msbg/mswg;
+
+ return new AnovaStats(dfbg, dfwg, F);
+ }
+
+ /**
+ Convenience class to pass dfbg,dfwg,F values around within AnovaImpl.
+ No get/set methods provided.
+ */
+ private static class AnovaStats {
+
+ /** Degrees of freedom in numerator (between groups). */
+ private int dfbg;
+
+ /** Degrees of freedom in denominator (within groups). */
+ private int dfwg;
+
+ /** Statistic. */
+ private double F;
+
+ /**
+ * Constructor
+ * @param dfbg degrees of freedom in numerator (between groups)
+ * @param dfwg degrees of freedom in denominator (within groups)
+ * @param F statistic
+ */
+ private AnovaStats(int dfbg, int dfwg, double F) {
+ this.dfbg = dfbg;
+ this.dfwg = dfwg;
+ this.F = F;
+ }
+ }
+
+}
diff --git a/src/main/java/org/apache/commons/math/stat/inference/TTest.java b/src/main/java/org/apache/commons/math/stat/inference/TTest.java
new file mode 100644
index 0000000..0ccb0c0
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/inference/TTest.java
@@ -0,0 +1,771 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.inference;
+
+import org.apache.commons.math.MathException;
+import org.apache.commons.math.stat.descriptive.StatisticalSummary;
+
+/**
+ * An interface for Student's t-tests.
+ * <p>
+ * Tests can be:<ul>
+ * <li>One-sample or two-sample</li>
+ * <li>One-sided or two-sided</li>
+ * <li>Paired or unpaired (for two-sample tests)</li>
+ * <li>Homoscedastic (equal variance assumption) or heteroscedastic
+ * (for two sample tests)</li>
+ * <li>Fixed significance level (boolean-valued) or returning p-values.
+ * </li></ul></p>
+ * <p>
+ * Test statistics are available for all tests. Methods including "Test" in
+ * in their names perform tests, all other methods return t-statistics. Among
+ * the "Test" methods, <code>double-</code>valued methods return p-values;
+ * <code>boolean-</code>valued methods perform fixed significance level tests.
+ * Significance levels are always specified as numbers between 0 and 0.5
+ * (e.g. tests at the 95% level use <code>alpha=0.05</code>).</p>
+ * <p>
+ * Input to tests can be either <code>double[]</code> arrays or
+ * {@link StatisticalSummary} instances.</p>
+ *
+ *
+ * @version $Revision: 811786 $ $Date: 2009-09-06 11:36:08 +0200 (dim. 06 sept. 2009) $
+ */
+public interface TTest {
+ /**
+ * Computes a paired, 2-sample t-statistic based on the data in the input
+ * arrays. The t-statistic returned is equivalent to what would be returned by
+ * computing the one-sample t-statistic {@link #t(double, double[])}, with
+ * <code>mu = 0</code> and the sample array consisting of the (signed)
+ * differences between corresponding entries in <code>sample1</code> and
+ * <code>sample2.</code>
+ * <p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>The input arrays must have the same length and their common length
+ * must be at least 2.
+ * </li></ul></p>
+ *
+ * @param sample1 array of sample data values
+ * @param sample2 array of sample data values
+ * @return t statistic
+ * @throws IllegalArgumentException if the precondition is not met
+ * @throws MathException if the statistic can not be computed do to a
+ * convergence or other numerical error.
+ */
+ double pairedT(double[] sample1, double[] sample2)
+ throws IllegalArgumentException, MathException;
+ /**
+ * Returns the <i>observed significance level</i>, or
+ * <i> p-value</i>, associated with a paired, two-sample, two-tailed t-test
+ * based on the data in the input arrays.
+ * <p>
+ * The number returned is the smallest significance level
+ * at which one can reject the null hypothesis that the mean of the paired
+ * differences is 0 in favor of the two-sided alternative that the mean paired
+ * difference is not equal to 0. For a one-sided test, divide the returned
+ * value by 2.</p>
+ * <p>
+ * This test is equivalent to a one-sample t-test computed using
+ * {@link #tTest(double, double[])} with <code>mu = 0</code> and the sample
+ * array consisting of the signed differences between corresponding elements of
+ * <code>sample1</code> and <code>sample2.</code></p>
+ * <p>
+ * <strong>Usage Note:</strong><br>
+ * The validity of the p-value depends on the assumptions of the parametric
+ * t-test procedure, as discussed
+ * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
+ * here</a></p>
+ * <p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>The input array lengths must be the same and their common length must
+ * be at least 2.
+ * </li></ul></p>
+ *
+ * @param sample1 array of sample data values
+ * @param sample2 array of sample data values
+ * @return p-value for t-test
+ * @throws IllegalArgumentException if the precondition is not met
+ * @throws MathException if an error occurs computing the p-value
+ */
+ double pairedTTest(double[] sample1, double[] sample2)
+ throws IllegalArgumentException, MathException;
+ /**
+ * Performs a paired t-test evaluating the null hypothesis that the
+ * mean of the paired differences between <code>sample1</code> and
+ * <code>sample2</code> is 0 in favor of the two-sided alternative that the
+ * mean paired difference is not equal to 0, with significance level
+ * <code>alpha</code>.
+ * <p>
+ * Returns <code>true</code> iff the null hypothesis can be rejected with
+ * confidence <code>1 - alpha</code>. To perform a 1-sided test, use
+ * <code>alpha * 2</code></p>
+ * <p>
+ * <strong>Usage Note:</strong><br>
+ * The validity of the test depends on the assumptions of the parametric
+ * t-test procedure, as discussed
+ * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
+ * here</a></p>
+ * <p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>The input array lengths must be the same and their common length
+ * must be at least 2.
+ * </li>
+ * <li> <code> 0 < alpha < 0.5 </code>
+ * </li></ul></p>
+ *
+ * @param sample1 array of sample data values
+ * @param sample2 array of sample data values
+ * @param alpha significance level of the test
+ * @return true if the null hypothesis can be rejected with
+ * confidence 1 - alpha
+ * @throws IllegalArgumentException if the preconditions are not met
+ * @throws MathException if an error occurs performing the test
+ */
+ boolean pairedTTest(
+ double[] sample1,
+ double[] sample2,
+ double alpha)
+ throws IllegalArgumentException, MathException;
+ /**
+ * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
+ * t statistic </a> given observed values and a comparison constant.
+ * <p>
+ * This statistic can be used to perform a one sample t-test for the mean.
+ * </p><p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>The observed array length must be at least 2.
+ * </li></ul></p>
+ *
+ * @param mu comparison constant
+ * @param observed array of values
+ * @return t statistic
+ * @throws IllegalArgumentException if input array length is less than 2
+ */
+ double t(double mu, double[] observed)
+ throws IllegalArgumentException;
+ /**
+ * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
+ * t statistic </a> to use in comparing the mean of the dataset described by
+ * <code>sampleStats</code> to <code>mu</code>.
+ * <p>
+ * This statistic can be used to perform a one sample t-test for the mean.
+ * </p><p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li><code>observed.getN() > = 2</code>.
+ * </li></ul></p>
+ *
+ * @param mu comparison constant
+ * @param sampleStats DescriptiveStatistics holding sample summary statitstics
+ * @return t statistic
+ * @throws IllegalArgumentException if the precondition is not met
+ */
+ double t(double mu, StatisticalSummary sampleStats)
+ throws IllegalArgumentException;
+ /**
+ * Computes a 2-sample t statistic, under the hypothesis of equal
+ * subpopulation variances. To compute a t-statistic without the
+ * equal variances hypothesis, use {@link #t(double[], double[])}.
+ * <p>
+ * This statistic can be used to perform a (homoscedastic) two-sample
+ * t-test to compare sample means.</p>
+ * <p>
+ * The t-statisitc is</p>
+ * <p>
+ * &nbsp;&nbsp;<code> t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
+ * </p><p>
+ * where <strong><code>n1</code></strong> is the size of first sample;
+ * <strong><code> n2</code></strong> is the size of second sample;
+ * <strong><code> m1</code></strong> is the mean of first sample;
+ * <strong><code> m2</code></strong> is the mean of second sample</li>
+ * </ul>
+ * and <strong><code>var</code></strong> is the pooled variance estimate:
+ * </p><p>
+ * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
+ * </p><p>
+ * with <strong><code>var1<code></strong> the variance of the first sample and
+ * <strong><code>var2</code></strong> the variance of the second sample.
+ * </p><p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>The observed array lengths must both be at least 2.
+ * </li></ul></p>
+ *
+ * @param sample1 array of sample data values
+ * @param sample2 array of sample data values
+ * @return t statistic
+ * @throws IllegalArgumentException if the precondition is not met
+ */
+ double homoscedasticT(double[] sample1, double[] sample2)
+ throws IllegalArgumentException;
+ /**
+ * Computes a 2-sample t statistic, without the hypothesis of equal
+ * subpopulation variances. To compute a t-statistic assuming equal
+ * variances, use {@link #homoscedasticT(double[], double[])}.
+ * <p>
+ * This statistic can be used to perform a two-sample t-test to compare
+ * sample means.</p>
+ * <p>
+ * The t-statisitc is</p>
+ * <p>
+ * &nbsp;&nbsp; <code> t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
+ * </p><p>
+ * where <strong><code>n1</code></strong> is the size of the first sample
+ * <strong><code> n2</code></strong> is the size of the second sample;
+ * <strong><code> m1</code></strong> is the mean of the first sample;
+ * <strong><code> m2</code></strong> is the mean of the second sample;
+ * <strong><code> var1</code></strong> is the variance of the first sample;
+ * <strong><code> var2</code></strong> is the variance of the second sample;
+ * </p><p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>The observed array lengths must both be at least 2.
+ * </li></ul></p>
+ *
+ * @param sample1 array of sample data values
+ * @param sample2 array of sample data values
+ * @return t statistic
+ * @throws IllegalArgumentException if the precondition is not met
+ */
+ double t(double[] sample1, double[] sample2)
+ throws IllegalArgumentException;
+ /**
+ * Computes a 2-sample t statistic </a>, comparing the means of the datasets
+ * described by two {@link StatisticalSummary} instances, without the
+ * assumption of equal subpopulation variances. Use
+ * {@link #homoscedasticT(StatisticalSummary, StatisticalSummary)} to
+ * compute a t-statistic under the equal variances assumption.
+ * <p>
+ * This statistic can be used to perform a two-sample t-test to compare
+ * sample means.</p>
+ * <p>
+ * The returned t-statisitc is</p>
+ * <p>
+ * &nbsp;&nbsp; <code> t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
+ * </p><p>
+ * where <strong><code>n1</code></strong> is the size of the first sample;
+ * <strong><code> n2</code></strong> is the size of the second sample;
+ * <strong><code> m1</code></strong> is the mean of the first sample;
+ * <strong><code> m2</code></strong> is the mean of the second sample
+ * <strong><code> var1</code></strong> is the variance of the first sample;
+ * <strong><code> var2</code></strong> is the variance of the second sample
+ * </p><p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>The datasets described by the two Univariates must each contain
+ * at least 2 observations.
+ * </li></ul></p>
+ *
+ * @param sampleStats1 StatisticalSummary describing data from the first sample
+ * @param sampleStats2 StatisticalSummary describing data from the second sample
+ * @return t statistic
+ * @throws IllegalArgumentException if the precondition is not met
+ */
+ double t(
+ StatisticalSummary sampleStats1,
+ StatisticalSummary sampleStats2)
+ throws IllegalArgumentException;
+ /**
+ * Computes a 2-sample t statistic, comparing the means of the datasets
+ * described by two {@link StatisticalSummary} instances, under the
+ * assumption of equal subpopulation variances. To compute a t-statistic
+ * without the equal variances assumption, use
+ * {@link #t(StatisticalSummary, StatisticalSummary)}.
+ * <p>
+ * This statistic can be used to perform a (homoscedastic) two-sample
+ * t-test to compare sample means.</p>
+ * <p>
+ * The t-statisitc returned is</p>
+ * <p>
+ * &nbsp;&nbsp;<code> t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
+ * </p><p>
+ * where <strong><code>n1</code></strong> is the size of first sample;
+ * <strong><code> n2</code></strong> is the size of second sample;
+ * <strong><code> m1</code></strong> is the mean of first sample;
+ * <strong><code> m2</code></strong> is the mean of second sample
+ * and <strong><code>var</code></strong> is the pooled variance estimate:
+ * </p><p>
+ * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
+ * </p><p>
+ * with <strong><code>var1<code></strong> the variance of the first sample and
+ * <strong><code>var2</code></strong> the variance of the second sample.
+ * </p><p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>The datasets described by the two Univariates must each contain
+ * at least 2 observations.
+ * </li></ul></p>
+ *
+ * @param sampleStats1 StatisticalSummary describing data from the first sample
+ * @param sampleStats2 StatisticalSummary describing data from the second sample
+ * @return t statistic
+ * @throws IllegalArgumentException if the precondition is not met
+ */
+ double homoscedasticT(
+ StatisticalSummary sampleStats1,
+ StatisticalSummary sampleStats2)
+ throws IllegalArgumentException;
+ /**
+ * Returns the <i>observed significance level</i>, or
+ * <i>p-value</i>, associated with a one-sample, two-tailed t-test
+ * comparing the mean of the input array with the constant <code>mu</code>.
+ * <p>
+ * The number returned is the smallest significance level
+ * at which one can reject the null hypothesis that the mean equals
+ * <code>mu</code> in favor of the two-sided alternative that the mean
+ * is different from <code>mu</code>. For a one-sided test, divide the
+ * returned value by 2.</p>
+ * <p>
+ * <strong>Usage Note:</strong><br>
+ * The validity of the test depends on the assumptions of the parametric
+ * t-test procedure, as discussed
+ * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
+ * </p><p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>The observed array length must be at least 2.
+ * </li></ul></p>
+ *
+ * @param mu constant value to compare sample mean against
+ * @param sample array of sample data values
+ * @return p-value
+ * @throws IllegalArgumentException if the precondition is not met
+ * @throws MathException if an error occurs computing the p-value
+ */
+ double tTest(double mu, double[] sample)
+ throws IllegalArgumentException, MathException;
+ /**
+ * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
+ * two-sided t-test</a> evaluating the null hypothesis that the mean of the population from
+ * which <code>sample</code> is drawn equals <code>mu</code>.
+ * <p>
+ * Returns <code>true</code> iff the null hypothesis can be
+ * rejected with confidence <code>1 - alpha</code>. To
+ * perform a 1-sided test, use <code>alpha * 2</code></p>
+ * <p>
+ * <strong>Examples:</strong><br><ol>
+ * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
+ * the 95% level, use <br><code>tTest(mu, sample, 0.05) </code>
+ * </li>
+ * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
+ * at the 99% level, first verify that the measured sample mean is less
+ * than <code>mu</code> and then use
+ * <br><code>tTest(mu, sample, 0.02) </code>
+ * </li></ol></p>
+ * <p>
+ * <strong>Usage Note:</strong><br>
+ * The validity of the test depends on the assumptions of the one-sample
+ * parametric t-test procedure, as discussed
+ * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
+ * </p><p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>The observed array length must be at least 2.
+ * </li></ul></p>
+ *
+ * @param mu constant value to compare sample mean against
+ * @param sample array of sample data values
+ * @param alpha significance level of the test
+ * @return p-value
+ * @throws IllegalArgumentException if the precondition is not met
+ * @throws MathException if an error computing the p-value
+ */
+ boolean tTest(double mu, double[] sample, double alpha)
+ throws IllegalArgumentException, MathException;
+ /**
+ * Returns the <i>observed significance level</i>, or
+ * <i>p-value</i>, associated with a one-sample, two-tailed t-test
+ * comparing the mean of the dataset described by <code>sampleStats</code>
+ * with the constant <code>mu</code>.
+ * <p>
+ * The number returned is the smallest significance level
+ * at which one can reject the null hypothesis that the mean equals
+ * <code>mu</code> in favor of the two-sided alternative that the mean
+ * is different from <code>mu</code>. For a one-sided test, divide the
+ * returned value by 2.</p>
+ * <p>
+ * <strong>Usage Note:</strong><br>
+ * The validity of the test depends on the assumptions of the parametric
+ * t-test procedure, as discussed
+ * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
+ * here</a></p>
+ * <p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>The sample must contain at least 2 observations.
+ * </li></ul></p>
+ *
+ * @param mu constant value to compare sample mean against
+ * @param sampleStats StatisticalSummary describing sample data
+ * @return p-value
+ * @throws IllegalArgumentException if the precondition is not met
+ * @throws MathException if an error occurs computing the p-value
+ */
+ double tTest(double mu, StatisticalSummary sampleStats)
+ throws IllegalArgumentException, MathException;
+ /**
+ * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
+ * two-sided t-test</a> evaluating the null hypothesis that the mean of the
+ * population from which the dataset described by <code>stats</code> is
+ * drawn equals <code>mu</code>.
+ * <p>
+ * Returns <code>true</code> iff the null hypothesis can be rejected with
+ * confidence <code>1 - alpha</code>. To perform a 1-sided test, use
+ * <code>alpha * 2.</code></p>
+ * <p>
+ * <strong>Examples:</strong><br><ol>
+ * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
+ * the 95% level, use <br><code>tTest(mu, sampleStats, 0.05) </code>
+ * </li>
+ * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
+ * at the 99% level, first verify that the measured sample mean is less
+ * than <code>mu</code> and then use
+ * <br><code>tTest(mu, sampleStats, 0.02) </code>
+ * </li></ol></p>
+ * <p>
+ * <strong>Usage Note:</strong><br>
+ * The validity of the test depends on the assumptions of the one-sample
+ * parametric t-test procedure, as discussed
+ * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
+ * </p><p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>The sample must include at least 2 observations.
+ * </li></ul></p>
+ *
+ * @param mu constant value to compare sample mean against
+ * @param sampleStats StatisticalSummary describing sample data values
+ * @param alpha significance level of the test
+ * @return p-value
+ * @throws IllegalArgumentException if the precondition is not met
+ * @throws MathException if an error occurs computing the p-value
+ */
+ boolean tTest(
+ double mu,
+ StatisticalSummary sampleStats,
+ double alpha)
+ throws IllegalArgumentException, MathException;
+ /**
+ * Returns the <i>observed significance level</i>, or
+ * <i>p-value</i>, associated with a two-sample, two-tailed t-test
+ * comparing the means of the input arrays.
+ * <p>
+ * The number returned is the smallest significance level
+ * at which one can reject the null hypothesis that the two means are
+ * equal in favor of the two-sided alternative that they are different.
+ * For a one-sided test, divide the returned value by 2.</p>
+ * <p>
+ * The test does not assume that the underlying popuation variances are
+ * equal and it uses approximated degrees of freedom computed from the
+ * sample data to compute the p-value. The t-statistic used is as defined in
+ * {@link #t(double[], double[])} and the Welch-Satterthwaite approximation
+ * to the degrees of freedom is used,
+ * as described
+ * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
+ * here.</a> To perform the test under the assumption of equal subpopulation
+ * variances, use {@link #homoscedasticTTest(double[], double[])}.</p>
+ * <p>
+ * <strong>Usage Note:</strong><br>
+ * The validity of the p-value depends on the assumptions of the parametric
+ * t-test procedure, as discussed
+ * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
+ * here</a></p>
+ * <p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>The observed array lengths must both be at least 2.
+ * </li></ul></p>
+ *
+ * @param sample1 array of sample data values
+ * @param sample2 array of sample data values
+ * @return p-value for t-test
+ * @throws IllegalArgumentException if the precondition is not met
+ * @throws MathException if an error occurs computing the p-value
+ */
+ double tTest(double[] sample1, double[] sample2)
+ throws IllegalArgumentException, MathException;
+ /**
+ * Returns the <i>observed significance level</i>, or
+ * <i>p-value</i>, associated with a two-sample, two-tailed t-test
+ * comparing the means of the input arrays, under the assumption that
+ * the two samples are drawn from subpopulations with equal variances.
+ * To perform the test without the equal variances assumption, use
+ * {@link #tTest(double[], double[])}.</p>
+ * <p>
+ * The number returned is the smallest significance level
+ * at which one can reject the null hypothesis that the two means are
+ * equal in favor of the two-sided alternative that they are different.
+ * For a one-sided test, divide the returned value by 2.</p>
+ * <p>
+ * A pooled variance estimate is used to compute the t-statistic. See
+ * {@link #homoscedasticT(double[], double[])}. The sum of the sample sizes
+ * minus 2 is used as the degrees of freedom.</p>
+ * <p>
+ * <strong>Usage Note:</strong><br>
+ * The validity of the p-value depends on the assumptions of the parametric
+ * t-test procedure, as discussed
+ * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
+ * here</a></p>
+ * <p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>The observed array lengths must both be at least 2.
+ * </li></ul></p>
+ *
+ * @param sample1 array of sample data values
+ * @param sample2 array of sample data values
+ * @return p-value for t-test
+ * @throws IllegalArgumentException if the precondition is not met
+ * @throws MathException if an error occurs computing the p-value
+ */
+ double homoscedasticTTest(
+ double[] sample1,
+ double[] sample2)
+ throws IllegalArgumentException, MathException;
+ /**
+ * Performs a
+ * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
+ * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
+ * and <code>sample2</code> are drawn from populations with the same mean,
+ * with significance level <code>alpha</code>. This test does not assume
+ * that the subpopulation variances are equal. To perform the test assuming
+ * equal variances, use
+ * {@link #homoscedasticTTest(double[], double[], double)}.
+ * <p>
+ * Returns <code>true</code> iff the null hypothesis that the means are
+ * equal can be rejected with confidence <code>1 - alpha</code>. To
+ * perform a 1-sided test, use <code>alpha * 2</code></p>
+ * <p>
+ * See {@link #t(double[], double[])} for the formula used to compute the
+ * t-statistic. Degrees of freedom are approximated using the
+ * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
+ * Welch-Satterthwaite approximation.</a></p>
+ * <p>
+ * <strong>Examples:</strong><br><ol>
+ * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
+ * the 95% level, use
+ * <br><code>tTest(sample1, sample2, 0.05). </code>
+ * </li>
+ * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code>,
+ * at the 99% level, first verify that the measured mean of <code>sample 1</code>
+ * is less than the mean of <code>sample 2</code> and then use
+ * <br><code>tTest(sample1, sample2, 0.02) </code>
+ * </li></ol></p>
+ * <p>
+ * <strong>Usage Note:</strong><br>
+ * The validity of the test depends on the assumptions of the parametric
+ * t-test procedure, as discussed
+ * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
+ * here</a></p>
+ * <p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>The observed array lengths must both be at least 2.
+ * </li>
+ * <li> <code> 0 < alpha < 0.5 </code>
+ * </li></ul></p>
+ *
+ * @param sample1 array of sample data values
+ * @param sample2 array of sample data values
+ * @param alpha significance level of the test
+ * @return true if the null hypothesis can be rejected with
+ * confidence 1 - alpha
+ * @throws IllegalArgumentException if the preconditions are not met
+ * @throws MathException if an error occurs performing the test
+ */
+ boolean tTest(
+ double[] sample1,
+ double[] sample2,
+ double alpha)
+ throws IllegalArgumentException, MathException;
+ /**
+ * Performs a
+ * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
+ * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
+ * and <code>sample2</code> are drawn from populations with the same mean,
+ * with significance level <code>alpha</code>, assuming that the
+ * subpopulation variances are equal. Use
+ * {@link #tTest(double[], double[], double)} to perform the test without
+ * the assumption of equal variances.
+ * <p>
+ * Returns <code>true</code> iff the null hypothesis that the means are
+ * equal can be rejected with confidence <code>1 - alpha</code>. To
+ * perform a 1-sided test, use <code>alpha * 2.</code> To perform the test
+ * without the assumption of equal subpopulation variances, use
+ * {@link #tTest(double[], double[], double)}.</p>
+ * <p>
+ * A pooled variance estimate is used to compute the t-statistic. See
+ * {@link #t(double[], double[])} for the formula. The sum of the sample
+ * sizes minus 2 is used as the degrees of freedom.</p>
+ * <p>
+ * <strong>Examples:</strong><br><ol>
+ * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
+ * the 95% level, use <br><code>tTest(sample1, sample2, 0.05). </code>
+ * </li>
+ * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2, </code>
+ * at the 99% level, first verify that the measured mean of
+ * <code>sample 1</code> is less than the mean of <code>sample 2</code>
+ * and then use
+ * <br><code>tTest(sample1, sample2, 0.02) </code>
+ * </li></ol></p>
+ * <p>
+ * <strong>Usage Note:</strong><br>
+ * The validity of the test depends on the assumptions of the parametric
+ * t-test procedure, as discussed
+ * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
+ * here</a></p>
+ * <p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>The observed array lengths must both be at least 2.
+ * </li>
+ * <li> <code> 0 < alpha < 0.5 </code>
+ * </li></ul></p>
+ *
+ * @param sample1 array of sample data values
+ * @param sample2 array of sample data values
+ * @param alpha significance level of the test
+ * @return true if the null hypothesis can be rejected with
+ * confidence 1 - alpha
+ * @throws IllegalArgumentException if the preconditions are not met
+ * @throws MathException if an error occurs performing the test
+ */
+ boolean homoscedasticTTest(
+ double[] sample1,
+ double[] sample2,
+ double alpha)
+ throws IllegalArgumentException, MathException;
+ /**
+ * Returns the <i>observed significance level</i>, or
+ * <i>p-value</i>, associated with a two-sample, two-tailed t-test
+ * comparing the means of the datasets described by two StatisticalSummary
+ * instances.
+ * <p>
+ * The number returned is the smallest significance level
+ * at which one can reject the null hypothesis that the two means are
+ * equal in favor of the two-sided alternative that they are different.
+ * For a one-sided test, divide the returned value by 2.</p>
+ * <p>
+ * The test does not assume that the underlying popuation variances are
+ * equal and it uses approximated degrees of freedom computed from the
+ * sample data to compute the p-value. To perform the test assuming
+ * equal variances, use
+ * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.</p>
+ * <p>
+ * <strong>Usage Note:</strong><br>
+ * The validity of the p-value depends on the assumptions of the parametric
+ * t-test procedure, as discussed
+ * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
+ * here</a></p>
+ * <p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>The datasets described by the two Univariates must each contain
+ * at least 2 observations.
+ * </li></ul></p>
+ *
+ * @param sampleStats1 StatisticalSummary describing data from the first sample
+ * @param sampleStats2 StatisticalSummary describing data from the second sample
+ * @return p-value for t-test
+ * @throws IllegalArgumentException if the precondition is not met
+ * @throws MathException if an error occurs computing the p-value
+ */
+ double tTest(
+ StatisticalSummary sampleStats1,
+ StatisticalSummary sampleStats2)
+ throws IllegalArgumentException, MathException;
+ /**
+ * Returns the <i>observed significance level</i>, or
+ * <i>p-value</i>, associated with a two-sample, two-tailed t-test
+ * comparing the means of the datasets described by two StatisticalSummary
+ * instances, under the hypothesis of equal subpopulation variances. To
+ * perform a test without the equal variances assumption, use
+ * {@link #tTest(StatisticalSummary, StatisticalSummary)}.
+ * <p>
+ * The number returned is the smallest significance level
+ * at which one can reject the null hypothesis that the two means are
+ * equal in favor of the two-sided alternative that they are different.
+ * For a one-sided test, divide the returned value by 2.</p>
+ * <p>
+ * See {@link #homoscedasticT(double[], double[])} for the formula used to
+ * compute the t-statistic. The sum of the sample sizes minus 2 is used as
+ * the degrees of freedom.</p>
+ * <p>
+ * <strong>Usage Note:</strong><br>
+ * The validity of the p-value depends on the assumptions of the parametric
+ * t-test procedure, as discussed
+ * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
+ * </p><p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>The datasets described by the two Univariates must each contain
+ * at least 2 observations.
+ * </li></ul></p>
+ *
+ * @param sampleStats1 StatisticalSummary describing data from the first sample
+ * @param sampleStats2 StatisticalSummary describing data from the second sample
+ * @return p-value for t-test
+ * @throws IllegalArgumentException if the precondition is not met
+ * @throws MathException if an error occurs computing the p-value
+ */
+ double homoscedasticTTest(
+ StatisticalSummary sampleStats1,
+ StatisticalSummary sampleStats2)
+ throws IllegalArgumentException, MathException;
+ /**
+ * Performs a
+ * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
+ * two-sided t-test</a> evaluating the null hypothesis that
+ * <code>sampleStats1</code> and <code>sampleStats2</code> describe
+ * datasets drawn from populations with the same mean, with significance
+ * level <code>alpha</code>. This test does not assume that the
+ * subpopulation variances are equal. To perform the test under the equal
+ * variances assumption, use
+ * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.
+ * <p>
+ * Returns <code>true</code> iff the null hypothesis that the means are
+ * equal can be rejected with confidence <code>1 - alpha</code>. To
+ * perform a 1-sided test, use <code>alpha * 2</code></p>
+ * <p>
+ * See {@link #t(double[], double[])} for the formula used to compute the
+ * t-statistic. Degrees of freedom are approximated using the
+ * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
+ * Welch-Satterthwaite approximation.</a></p>
+ * <p>
+ * <strong>Examples:</strong><br><ol>
+ * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
+ * the 95%, use
+ * <br><code>tTest(sampleStats1, sampleStats2, 0.05) </code>
+ * </li>
+ * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code>
+ * at the 99% level, first verify that the measured mean of
+ * <code>sample 1</code> is less than the mean of <code>sample 2</code>
+ * and then use
+ * <br><code>tTest(sampleStats1, sampleStats2, 0.02) </code>
+ * </li></ol></p>
+ * <p>
+ * <strong>Usage Note:</strong><br>
+ * The validity of the test depends on the assumptions of the parametric
+ * t-test procedure, as discussed
+ * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
+ * here</a></p>
+ * <p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>The datasets described by the two Univariates must each contain
+ * at least 2 observations.
+ * </li>
+ * <li> <code> 0 < alpha < 0.5 </code>
+ * </li></ul></p>
+ *
+ * @param sampleStats1 StatisticalSummary describing sample data values
+ * @param sampleStats2 StatisticalSummary describing sample data values
+ * @param alpha significance level of the test
+ * @return true if the null hypothesis can be rejected with
+ * confidence 1 - alpha
+ * @throws IllegalArgumentException if the preconditions are not met
+ * @throws MathException if an error occurs performing the test
+ */
+ boolean tTest(
+ StatisticalSummary sampleStats1,
+ StatisticalSummary sampleStats2,
+ double alpha)
+ throws IllegalArgumentException, MathException;
+}
diff --git a/src/main/java/org/apache/commons/math/stat/inference/TTestImpl.java b/src/main/java/org/apache/commons/math/stat/inference/TTestImpl.java
new file mode 100644
index 0000000..d4d1a12
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/inference/TTestImpl.java
@@ -0,0 +1,1069 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.inference;
+
+import org.apache.commons.math.MathException;
+import org.apache.commons.math.MathRuntimeException;
+import org.apache.commons.math.distribution.TDistribution;
+import org.apache.commons.math.distribution.TDistributionImpl;
+import org.apache.commons.math.exception.util.LocalizedFormats;
+import org.apache.commons.math.stat.StatUtils;
+import org.apache.commons.math.stat.descriptive.StatisticalSummary;
+import org.apache.commons.math.util.FastMath;
+
+/**
+ * Implements t-test statistics defined in the {@link TTest} interface.
+ * <p>
+ * Uses commons-math {@link org.apache.commons.math.distribution.TDistributionImpl}
+ * implementation to estimate exact p-values.</p>
+ *
+ * @version $Revision: 1042336 $ $Date: 2010-12-05 13:40:48 +0100 (dim. 05 déc. 2010) $
+ */
+public class TTestImpl implements TTest {
+
+ /** Distribution used to compute inference statistics.
+ * @deprecated in 2.2 (to be removed in 3.0).
+ */
+ @Deprecated
+ private TDistribution distribution;
+
+ /**
+ * Default constructor.
+ */
+ public TTestImpl() {
+ this(new TDistributionImpl(1.0));
+ }
+
+ /**
+ * Create a test instance using the given distribution for computing
+ * inference statistics.
+ * @param t distribution used to compute inference statistics.
+ * @since 1.2
+ * @deprecated in 2.2 (to be removed in 3.0).
+ */
+ @Deprecated
+ public TTestImpl(TDistribution t) {
+ super();
+ setDistribution(t);
+ }
+
+ /**
+ * Computes a paired, 2-sample t-statistic based on the data in the input
+ * arrays. The t-statistic returned is equivalent to what would be returned by
+ * computing the one-sample t-statistic {@link #t(double, double[])}, with
+ * <code>mu = 0</code> and the sample array consisting of the (signed)
+ * differences between corresponding entries in <code>sample1</code> and
+ * <code>sample2.</code>
+ * <p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>The input arrays must have the same length and their common length
+ * must be at least 2.
+ * </li></ul></p>
+ *
+ * @param sample1 array of sample data values
+ * @param sample2 array of sample data values
+ * @return t statistic
+ * @throws IllegalArgumentException if the precondition is not met
+ * @throws MathException if the statistic can not be computed do to a
+ * convergence or other numerical error.
+ */
+ public double pairedT(double[] sample1, double[] sample2)
+ throws IllegalArgumentException, MathException {
+ checkSampleData(sample1);
+ checkSampleData(sample2);
+ double meanDifference = StatUtils.meanDifference(sample1, sample2);
+ return t(meanDifference, 0,
+ StatUtils.varianceDifference(sample1, sample2, meanDifference),
+ sample1.length);
+ }
+
+ /**
+ * Returns the <i>observed significance level</i>, or
+ * <i> p-value</i>, associated with a paired, two-sample, two-tailed t-test
+ * based on the data in the input arrays.
+ * <p>
+ * The number returned is the smallest significance level
+ * at which one can reject the null hypothesis that the mean of the paired
+ * differences is 0 in favor of the two-sided alternative that the mean paired
+ * difference is not equal to 0. For a one-sided test, divide the returned
+ * value by 2.</p>
+ * <p>
+ * This test is equivalent to a one-sample t-test computed using
+ * {@link #tTest(double, double[])} with <code>mu = 0</code> and the sample
+ * array consisting of the signed differences between corresponding elements of
+ * <code>sample1</code> and <code>sample2.</code></p>
+ * <p>
+ * <strong>Usage Note:</strong><br>
+ * The validity of the p-value depends on the assumptions of the parametric
+ * t-test procedure, as discussed
+ * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
+ * here</a></p>
+ * <p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>The input array lengths must be the same and their common length must
+ * be at least 2.
+ * </li></ul></p>
+ *
+ * @param sample1 array of sample data values
+ * @param sample2 array of sample data values
+ * @return p-value for t-test
+ * @throws IllegalArgumentException if the precondition is not met
+ * @throws MathException if an error occurs computing the p-value
+ */
+ public double pairedTTest(double[] sample1, double[] sample2)
+ throws IllegalArgumentException, MathException {
+ double meanDifference = StatUtils.meanDifference(sample1, sample2);
+ return tTest(meanDifference, 0,
+ StatUtils.varianceDifference(sample1, sample2, meanDifference),
+ sample1.length);
+ }
+
+ /**
+ * Performs a paired t-test evaluating the null hypothesis that the
+ * mean of the paired differences between <code>sample1</code> and
+ * <code>sample2</code> is 0 in favor of the two-sided alternative that the
+ * mean paired difference is not equal to 0, with significance level
+ * <code>alpha</code>.
+ * <p>
+ * Returns <code>true</code> iff the null hypothesis can be rejected with
+ * confidence <code>1 - alpha</code>. To perform a 1-sided test, use
+ * <code>alpha * 2</code></p>
+ * <p>
+ * <strong>Usage Note:</strong><br>
+ * The validity of the test depends on the assumptions of the parametric
+ * t-test procedure, as discussed
+ * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
+ * here</a></p>
+ * <p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>The input array lengths must be the same and their common length
+ * must be at least 2.
+ * </li>
+ * <li> <code> 0 < alpha < 0.5 </code>
+ * </li></ul></p>
+ *
+ * @param sample1 array of sample data values
+ * @param sample2 array of sample data values
+ * @param alpha significance level of the test
+ * @return true if the null hypothesis can be rejected with
+ * confidence 1 - alpha
+ * @throws IllegalArgumentException if the preconditions are not met
+ * @throws MathException if an error occurs performing the test
+ */
+ public boolean pairedTTest(double[] sample1, double[] sample2, double alpha)
+ throws IllegalArgumentException, MathException {
+ checkSignificanceLevel(alpha);
+ return pairedTTest(sample1, sample2) < alpha;
+ }
+
+ /**
+ * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
+ * t statistic </a> given observed values and a comparison constant.
+ * <p>
+ * This statistic can be used to perform a one sample t-test for the mean.
+ * </p><p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>The observed array length must be at least 2.
+ * </li></ul></p>
+ *
+ * @param mu comparison constant
+ * @param observed array of values
+ * @return t statistic
+ * @throws IllegalArgumentException if input array length is less than 2
+ */
+ public double t(double mu, double[] observed)
+ throws IllegalArgumentException {
+ checkSampleData(observed);
+ return t(StatUtils.mean(observed), mu, StatUtils.variance(observed),
+ observed.length);
+ }
+
+ /**
+ * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
+ * t statistic </a> to use in comparing the mean of the dataset described by
+ * <code>sampleStats</code> to <code>mu</code>.
+ * <p>
+ * This statistic can be used to perform a one sample t-test for the mean.
+ * </p><p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li><code>observed.getN() > = 2</code>.
+ * </li></ul></p>
+ *
+ * @param mu comparison constant
+ * @param sampleStats DescriptiveStatistics holding sample summary statitstics
+ * @return t statistic
+ * @throws IllegalArgumentException if the precondition is not met
+ */
+ public double t(double mu, StatisticalSummary sampleStats)
+ throws IllegalArgumentException {
+ checkSampleData(sampleStats);
+ return t(sampleStats.getMean(), mu, sampleStats.getVariance(),
+ sampleStats.getN());
+ }
+
+ /**
+ * Computes a 2-sample t statistic, under the hypothesis of equal
+ * subpopulation variances. To compute a t-statistic without the
+ * equal variances hypothesis, use {@link #t(double[], double[])}.
+ * <p>
+ * This statistic can be used to perform a (homoscedastic) two-sample
+ * t-test to compare sample means.</p>
+ * <p>
+ * The t-statisitc is</p>
+ * <p>
+ * &nbsp;&nbsp;<code> t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
+ * </p><p>
+ * where <strong><code>n1</code></strong> is the size of first sample;
+ * <strong><code> n2</code></strong> is the size of second sample;
+ * <strong><code> m1</code></strong> is the mean of first sample;
+ * <strong><code> m2</code></strong> is the mean of second sample</li>
+ * </ul>
+ * and <strong><code>var</code></strong> is the pooled variance estimate:
+ * </p><p>
+ * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
+ * </p><p>
+ * with <strong><code>var1<code></strong> the variance of the first sample and
+ * <strong><code>var2</code></strong> the variance of the second sample.
+ * </p><p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>The observed array lengths must both be at least 2.
+ * </li></ul></p>
+ *
+ * @param sample1 array of sample data values
+ * @param sample2 array of sample data values
+ * @return t statistic
+ * @throws IllegalArgumentException if the precondition is not met
+ */
+ public double homoscedasticT(double[] sample1, double[] sample2)
+ throws IllegalArgumentException {
+ checkSampleData(sample1);
+ checkSampleData(sample2);
+ return homoscedasticT(StatUtils.mean(sample1), StatUtils.mean(sample2),
+ StatUtils.variance(sample1), StatUtils.variance(sample2),
+ sample1.length, sample2.length);
+ }
+
+ /**
+ * Computes a 2-sample t statistic, without the hypothesis of equal
+ * subpopulation variances. To compute a t-statistic assuming equal
+ * variances, use {@link #homoscedasticT(double[], double[])}.
+ * <p>
+ * This statistic can be used to perform a two-sample t-test to compare
+ * sample means.</p>
+ * <p>
+ * The t-statisitc is</p>
+ * <p>
+ * &nbsp;&nbsp; <code> t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
+ * </p><p>
+ * where <strong><code>n1</code></strong> is the size of the first sample
+ * <strong><code> n2</code></strong> is the size of the second sample;
+ * <strong><code> m1</code></strong> is the mean of the first sample;
+ * <strong><code> m2</code></strong> is the mean of the second sample;
+ * <strong><code> var1</code></strong> is the variance of the first sample;
+ * <strong><code> var2</code></strong> is the variance of the second sample;
+ * </p><p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>The observed array lengths must both be at least 2.
+ * </li></ul></p>
+ *
+ * @param sample1 array of sample data values
+ * @param sample2 array of sample data values
+ * @return t statistic
+ * @throws IllegalArgumentException if the precondition is not met
+ */
+ public double t(double[] sample1, double[] sample2)
+ throws IllegalArgumentException {
+ checkSampleData(sample1);
+ checkSampleData(sample2);
+ return t(StatUtils.mean(sample1), StatUtils.mean(sample2),
+ StatUtils.variance(sample1), StatUtils.variance(sample2),
+ sample1.length, sample2.length);
+ }
+
+ /**
+ * Computes a 2-sample t statistic </a>, comparing the means of the datasets
+ * described by two {@link StatisticalSummary} instances, without the
+ * assumption of equal subpopulation variances. Use
+ * {@link #homoscedasticT(StatisticalSummary, StatisticalSummary)} to
+ * compute a t-statistic under the equal variances assumption.
+ * <p>
+ * This statistic can be used to perform a two-sample t-test to compare
+ * sample means.</p>
+ * <p>
+ * The returned t-statisitc is</p>
+ * <p>
+ * &nbsp;&nbsp; <code> t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
+ * </p><p>
+ * where <strong><code>n1</code></strong> is the size of the first sample;
+ * <strong><code> n2</code></strong> is the size of the second sample;
+ * <strong><code> m1</code></strong> is the mean of the first sample;
+ * <strong><code> m2</code></strong> is the mean of the second sample
+ * <strong><code> var1</code></strong> is the variance of the first sample;
+ * <strong><code> var2</code></strong> is the variance of the second sample
+ * </p><p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>The datasets described by the two Univariates must each contain
+ * at least 2 observations.
+ * </li></ul></p>
+ *
+ * @param sampleStats1 StatisticalSummary describing data from the first sample
+ * @param sampleStats2 StatisticalSummary describing data from the second sample
+ * @return t statistic
+ * @throws IllegalArgumentException if the precondition is not met
+ */
+ public double t(StatisticalSummary sampleStats1,
+ StatisticalSummary sampleStats2)
+ throws IllegalArgumentException {
+ checkSampleData(sampleStats1);
+ checkSampleData(sampleStats2);
+ return t(sampleStats1.getMean(), sampleStats2.getMean(),
+ sampleStats1.getVariance(), sampleStats2.getVariance(),
+ sampleStats1.getN(), sampleStats2.getN());
+ }
+
+ /**
+ * Computes a 2-sample t statistic, comparing the means of the datasets
+ * described by two {@link StatisticalSummary} instances, under the
+ * assumption of equal subpopulation variances. To compute a t-statistic
+ * without the equal variances assumption, use
+ * {@link #t(StatisticalSummary, StatisticalSummary)}.
+ * <p>
+ * This statistic can be used to perform a (homoscedastic) two-sample
+ * t-test to compare sample means.</p>
+ * <p>
+ * The t-statisitc returned is</p>
+ * <p>
+ * &nbsp;&nbsp;<code> t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
+ * </p><p>
+ * where <strong><code>n1</code></strong> is the size of first sample;
+ * <strong><code> n2</code></strong> is the size of second sample;
+ * <strong><code> m1</code></strong> is the mean of first sample;
+ * <strong><code> m2</code></strong> is the mean of second sample
+ * and <strong><code>var</code></strong> is the pooled variance estimate:
+ * </p><p>
+ * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
+ * <p>
+ * with <strong><code>var1<code></strong> the variance of the first sample and
+ * <strong><code>var2</code></strong> the variance of the second sample.
+ * </p><p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>The datasets described by the two Univariates must each contain
+ * at least 2 observations.
+ * </li></ul></p>
+ *
+ * @param sampleStats1 StatisticalSummary describing data from the first sample
+ * @param sampleStats2 StatisticalSummary describing data from the second sample
+ * @return t statistic
+ * @throws IllegalArgumentException if the precondition is not met
+ */
+ public double homoscedasticT(StatisticalSummary sampleStats1,
+ StatisticalSummary sampleStats2)
+ throws IllegalArgumentException {
+ checkSampleData(sampleStats1);
+ checkSampleData(sampleStats2);
+ return homoscedasticT(sampleStats1.getMean(), sampleStats2.getMean(),
+ sampleStats1.getVariance(), sampleStats2.getVariance(),
+ sampleStats1.getN(), sampleStats2.getN());
+ }
+
+ /**
+ * Returns the <i>observed significance level</i>, or
+ * <i>p-value</i>, associated with a one-sample, two-tailed t-test
+ * comparing the mean of the input array with the constant <code>mu</code>.
+ * <p>
+ * The number returned is the smallest significance level
+ * at which one can reject the null hypothesis that the mean equals
+ * <code>mu</code> in favor of the two-sided alternative that the mean
+ * is different from <code>mu</code>. For a one-sided test, divide the
+ * returned value by 2.</p>
+ * <p>
+ * <strong>Usage Note:</strong><br>
+ * The validity of the test depends on the assumptions of the parametric
+ * t-test procedure, as discussed
+ * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
+ * </p><p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>The observed array length must be at least 2.
+ * </li></ul></p>
+ *
+ * @param mu constant value to compare sample mean against
+ * @param sample array of sample data values
+ * @return p-value
+ * @throws IllegalArgumentException if the precondition is not met
+ * @throws MathException if an error occurs computing the p-value
+ */
+ public double tTest(double mu, double[] sample)
+ throws IllegalArgumentException, MathException {
+ checkSampleData(sample);
+ return tTest( StatUtils.mean(sample), mu, StatUtils.variance(sample),
+ sample.length);
+ }
+
+ /**
+ * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
+ * two-sided t-test</a> evaluating the null hypothesis that the mean of the population from
+ * which <code>sample</code> is drawn equals <code>mu</code>.
+ * <p>
+ * Returns <code>true</code> iff the null hypothesis can be
+ * rejected with confidence <code>1 - alpha</code>. To
+ * perform a 1-sided test, use <code>alpha * 2</code>
+ * </p><p>
+ * <strong>Examples:</strong><br><ol>
+ * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
+ * the 95% level, use <br><code>tTest(mu, sample, 0.05) </code>
+ * </li>
+ * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
+ * at the 99% level, first verify that the measured sample mean is less
+ * than <code>mu</code> and then use
+ * <br><code>tTest(mu, sample, 0.02) </code>
+ * </li></ol></p>
+ * <p>
+ * <strong>Usage Note:</strong><br>
+ * The validity of the test depends on the assumptions of the one-sample
+ * parametric t-test procedure, as discussed
+ * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
+ * </p><p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>The observed array length must be at least 2.
+ * </li></ul></p>
+ *
+ * @param mu constant value to compare sample mean against
+ * @param sample array of sample data values
+ * @param alpha significance level of the test
+ * @return p-value
+ * @throws IllegalArgumentException if the precondition is not met
+ * @throws MathException if an error computing the p-value
+ */
+ public boolean tTest(double mu, double[] sample, double alpha)
+ throws IllegalArgumentException, MathException {
+ checkSignificanceLevel(alpha);
+ return tTest(mu, sample) < alpha;
+ }
+
+ /**
+ * Returns the <i>observed significance level</i>, or
+ * <i>p-value</i>, associated with a one-sample, two-tailed t-test
+ * comparing the mean of the dataset described by <code>sampleStats</code>
+ * with the constant <code>mu</code>.
+ * <p>
+ * The number returned is the smallest significance level
+ * at which one can reject the null hypothesis that the mean equals
+ * <code>mu</code> in favor of the two-sided alternative that the mean
+ * is different from <code>mu</code>. For a one-sided test, divide the
+ * returned value by 2.</p>
+ * <p>
+ * <strong>Usage Note:</strong><br>
+ * The validity of the test depends on the assumptions of the parametric
+ * t-test procedure, as discussed
+ * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
+ * here</a></p>
+ * <p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>The sample must contain at least 2 observations.
+ * </li></ul></p>
+ *
+ * @param mu constant value to compare sample mean against
+ * @param sampleStats StatisticalSummary describing sample data
+ * @return p-value
+ * @throws IllegalArgumentException if the precondition is not met
+ * @throws MathException if an error occurs computing the p-value
+ */
+ public double tTest(double mu, StatisticalSummary sampleStats)
+ throws IllegalArgumentException, MathException {
+ checkSampleData(sampleStats);
+ return tTest(sampleStats.getMean(), mu, sampleStats.getVariance(),
+ sampleStats.getN());
+ }
+
+ /**
+ * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
+ * two-sided t-test</a> evaluating the null hypothesis that the mean of the
+ * population from which the dataset described by <code>stats</code> is
+ * drawn equals <code>mu</code>.
+ * <p>
+ * Returns <code>true</code> iff the null hypothesis can be rejected with
+ * confidence <code>1 - alpha</code>. To perform a 1-sided test, use
+ * <code>alpha * 2.</code></p>
+ * <p>
+ * <strong>Examples:</strong><br><ol>
+ * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
+ * the 95% level, use <br><code>tTest(mu, sampleStats, 0.05) </code>
+ * </li>
+ * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
+ * at the 99% level, first verify that the measured sample mean is less
+ * than <code>mu</code> and then use
+ * <br><code>tTest(mu, sampleStats, 0.02) </code>
+ * </li></ol></p>
+ * <p>
+ * <strong>Usage Note:</strong><br>
+ * The validity of the test depends on the assumptions of the one-sample
+ * parametric t-test procedure, as discussed
+ * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
+ * </p><p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>The sample must include at least 2 observations.
+ * </li></ul></p>
+ *
+ * @param mu constant value to compare sample mean against
+ * @param sampleStats StatisticalSummary describing sample data values
+ * @param alpha significance level of the test
+ * @return p-value
+ * @throws IllegalArgumentException if the precondition is not met
+ * @throws MathException if an error occurs computing the p-value
+ */
+ public boolean tTest( double mu, StatisticalSummary sampleStats,
+ double alpha)
+ throws IllegalArgumentException, MathException {
+ checkSignificanceLevel(alpha);
+ return tTest(mu, sampleStats) < alpha;
+ }
+
+ /**
+ * Returns the <i>observed significance level</i>, or
+ * <i>p-value</i>, associated with a two-sample, two-tailed t-test
+ * comparing the means of the input arrays.
+ * <p>
+ * The number returned is the smallest significance level
+ * at which one can reject the null hypothesis that the two means are
+ * equal in favor of the two-sided alternative that they are different.
+ * For a one-sided test, divide the returned value by 2.</p>
+ * <p>
+ * The test does not assume that the underlying popuation variances are
+ * equal and it uses approximated degrees of freedom computed from the
+ * sample data to compute the p-value. The t-statistic used is as defined in
+ * {@link #t(double[], double[])} and the Welch-Satterthwaite approximation
+ * to the degrees of freedom is used,
+ * as described
+ * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
+ * here.</a> To perform the test under the assumption of equal subpopulation
+ * variances, use {@link #homoscedasticTTest(double[], double[])}.</p>
+ * <p>
+ * <strong>Usage Note:</strong><br>
+ * The validity of the p-value depends on the assumptions of the parametric
+ * t-test procedure, as discussed
+ * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
+ * here</a></p>
+ * <p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>The observed array lengths must both be at least 2.
+ * </li></ul></p>
+ *
+ * @param sample1 array of sample data values
+ * @param sample2 array of sample data values
+ * @return p-value for t-test
+ * @throws IllegalArgumentException if the precondition is not met
+ * @throws MathException if an error occurs computing the p-value
+ */
+ public double tTest(double[] sample1, double[] sample2)
+ throws IllegalArgumentException, MathException {
+ checkSampleData(sample1);
+ checkSampleData(sample2);
+ return tTest(StatUtils.mean(sample1), StatUtils.mean(sample2),
+ StatUtils.variance(sample1), StatUtils.variance(sample2),
+ sample1.length, sample2.length);
+ }
+
+ /**
+ * Returns the <i>observed significance level</i>, or
+ * <i>p-value</i>, associated with a two-sample, two-tailed t-test
+ * comparing the means of the input arrays, under the assumption that
+ * the two samples are drawn from subpopulations with equal variances.
+ * To perform the test without the equal variances assumption, use
+ * {@link #tTest(double[], double[])}.
+ * <p>
+ * The number returned is the smallest significance level
+ * at which one can reject the null hypothesis that the two means are
+ * equal in favor of the two-sided alternative that they are different.
+ * For a one-sided test, divide the returned value by 2.</p>
+ * <p>
+ * A pooled variance estimate is used to compute the t-statistic. See
+ * {@link #homoscedasticT(double[], double[])}. The sum of the sample sizes
+ * minus 2 is used as the degrees of freedom.</p>
+ * <p>
+ * <strong>Usage Note:</strong><br>
+ * The validity of the p-value depends on the assumptions of the parametric
+ * t-test procedure, as discussed
+ * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
+ * here</a></p>
+ * <p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>The observed array lengths must both be at least 2.
+ * </li></ul></p>
+ *
+ * @param sample1 array of sample data values
+ * @param sample2 array of sample data values
+ * @return p-value for t-test
+ * @throws IllegalArgumentException if the precondition is not met
+ * @throws MathException if an error occurs computing the p-value
+ */
+ public double homoscedasticTTest(double[] sample1, double[] sample2)
+ throws IllegalArgumentException, MathException {
+ checkSampleData(sample1);
+ checkSampleData(sample2);
+ return homoscedasticTTest(StatUtils.mean(sample1),
+ StatUtils.mean(sample2), StatUtils.variance(sample1),
+ StatUtils.variance(sample2), sample1.length,
+ sample2.length);
+ }
+
+
+ /**
+ * Performs a
+ * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
+ * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
+ * and <code>sample2</code> are drawn from populations with the same mean,
+ * with significance level <code>alpha</code>. This test does not assume
+ * that the subpopulation variances are equal. To perform the test assuming
+ * equal variances, use
+ * {@link #homoscedasticTTest(double[], double[], double)}.
+ * <p>
+ * Returns <code>true</code> iff the null hypothesis that the means are
+ * equal can be rejected with confidence <code>1 - alpha</code>. To
+ * perform a 1-sided test, use <code>alpha / 2</code></p>
+ * <p>
+ * See {@link #t(double[], double[])} for the formula used to compute the
+ * t-statistic. Degrees of freedom are approximated using the
+ * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
+ * Welch-Satterthwaite approximation.</a></p>
+
+ * <p>
+ * <strong>Examples:</strong><br><ol>
+ * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
+ * the 95% level, use
+ * <br><code>tTest(sample1, sample2, 0.05). </code>
+ * </li>
+ * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code> at
+ * the 99% level, first verify that the measured mean of <code>sample 1</code>
+ * is less than the mean of <code>sample 2</code> and then use
+ * <br><code>tTest(sample1, sample2, 0.02) </code>
+ * </li></ol></p>
+ * <p>
+ * <strong>Usage Note:</strong><br>
+ * The validity of the test depends on the assumptions of the parametric
+ * t-test procedure, as discussed
+ * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
+ * here</a></p>
+ * <p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>The observed array lengths must both be at least 2.
+ * </li>
+ * <li> <code> 0 < alpha < 0.5 </code>
+ * </li></ul></p>
+ *
+ * @param sample1 array of sample data values
+ * @param sample2 array of sample data values
+ * @param alpha significance level of the test
+ * @return true if the null hypothesis can be rejected with
+ * confidence 1 - alpha
+ * @throws IllegalArgumentException if the preconditions are not met
+ * @throws MathException if an error occurs performing the test
+ */
+ public boolean tTest(double[] sample1, double[] sample2,
+ double alpha)
+ throws IllegalArgumentException, MathException {
+ checkSignificanceLevel(alpha);
+ return tTest(sample1, sample2) < alpha;
+ }
+
+ /**
+ * Performs a
+ * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
+ * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
+ * and <code>sample2</code> are drawn from populations with the same mean,
+ * with significance level <code>alpha</code>, assuming that the
+ * subpopulation variances are equal. Use
+ * {@link #tTest(double[], double[], double)} to perform the test without
+ * the assumption of equal variances.
+ * <p>
+ * Returns <code>true</code> iff the null hypothesis that the means are
+ * equal can be rejected with confidence <code>1 - alpha</code>. To
+ * perform a 1-sided test, use <code>alpha * 2.</code> To perform the test
+ * without the assumption of equal subpopulation variances, use
+ * {@link #tTest(double[], double[], double)}.</p>
+ * <p>
+ * A pooled variance estimate is used to compute the t-statistic. See
+ * {@link #t(double[], double[])} for the formula. The sum of the sample
+ * sizes minus 2 is used as the degrees of freedom.</p>
+ * <p>
+ * <strong>Examples:</strong><br><ol>
+ * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
+ * the 95% level, use <br><code>tTest(sample1, sample2, 0.05). </code>
+ * </li>
+ * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2, </code>
+ * at the 99% level, first verify that the measured mean of
+ * <code>sample 1</code> is less than the mean of <code>sample 2</code>
+ * and then use
+ * <br><code>tTest(sample1, sample2, 0.02) </code>
+ * </li></ol></p>
+ * <p>
+ * <strong>Usage Note:</strong><br>
+ * The validity of the test depends on the assumptions of the parametric
+ * t-test procedure, as discussed
+ * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
+ * here</a></p>
+ * <p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>The observed array lengths must both be at least 2.
+ * </li>
+ * <li> <code> 0 < alpha < 0.5 </code>
+ * </li></ul></p>
+ *
+ * @param sample1 array of sample data values
+ * @param sample2 array of sample data values
+ * @param alpha significance level of the test
+ * @return true if the null hypothesis can be rejected with
+ * confidence 1 - alpha
+ * @throws IllegalArgumentException if the preconditions are not met
+ * @throws MathException if an error occurs performing the test
+ */
+ public boolean homoscedasticTTest(double[] sample1, double[] sample2,
+ double alpha)
+ throws IllegalArgumentException, MathException {
+ checkSignificanceLevel(alpha);
+ return homoscedasticTTest(sample1, sample2) < alpha;
+ }
+
+ /**
+ * Returns the <i>observed significance level</i>, or
+ * <i>p-value</i>, associated with a two-sample, two-tailed t-test
+ * comparing the means of the datasets described by two StatisticalSummary
+ * instances.
+ * <p>
+ * The number returned is the smallest significance level
+ * at which one can reject the null hypothesis that the two means are
+ * equal in favor of the two-sided alternative that they are different.
+ * For a one-sided test, divide the returned value by 2.</p>
+ * <p>
+ * The test does not assume that the underlying popuation variances are
+ * equal and it uses approximated degrees of freedom computed from the
+ * sample data to compute the p-value. To perform the test assuming
+ * equal variances, use
+ * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.</p>
+ * <p>
+ * <strong>Usage Note:</strong><br>
+ * The validity of the p-value depends on the assumptions of the parametric
+ * t-test procedure, as discussed
+ * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
+ * here</a></p>
+ * <p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>The datasets described by the two Univariates must each contain
+ * at least 2 observations.
+ * </li></ul></p>
+ *
+ * @param sampleStats1 StatisticalSummary describing data from the first sample
+ * @param sampleStats2 StatisticalSummary describing data from the second sample
+ * @return p-value for t-test
+ * @throws IllegalArgumentException if the precondition is not met
+ * @throws MathException if an error occurs computing the p-value
+ */
+ public double tTest(StatisticalSummary sampleStats1, StatisticalSummary sampleStats2)
+ throws IllegalArgumentException, MathException {
+ checkSampleData(sampleStats1);
+ checkSampleData(sampleStats2);
+ return tTest(sampleStats1.getMean(), sampleStats2.getMean(), sampleStats1.getVariance(),
+ sampleStats2.getVariance(), sampleStats1.getN(),
+ sampleStats2.getN());
+ }
+
+ /**
+ * Returns the <i>observed significance level</i>, or
+ * <i>p-value</i>, associated with a two-sample, two-tailed t-test
+ * comparing the means of the datasets described by two StatisticalSummary
+ * instances, under the hypothesis of equal subpopulation variances. To
+ * perform a test without the equal variances assumption, use
+ * {@link #tTest(StatisticalSummary, StatisticalSummary)}.
+ * <p>
+ * The number returned is the smallest significance level
+ * at which one can reject the null hypothesis that the two means are
+ * equal in favor of the two-sided alternative that they are different.
+ * For a one-sided test, divide the returned value by 2.</p>
+ * <p>
+ * See {@link #homoscedasticT(double[], double[])} for the formula used to
+ * compute the t-statistic. The sum of the sample sizes minus 2 is used as
+ * the degrees of freedom.</p>
+ * <p>
+ * <strong>Usage Note:</strong><br>
+ * The validity of the p-value depends on the assumptions of the parametric
+ * t-test procedure, as discussed
+ * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
+ * </p><p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>The datasets described by the two Univariates must each contain
+ * at least 2 observations.
+ * </li></ul></p>
+ *
+ * @param sampleStats1 StatisticalSummary describing data from the first sample
+ * @param sampleStats2 StatisticalSummary describing data from the second sample
+ * @return p-value for t-test
+ * @throws IllegalArgumentException if the precondition is not met
+ * @throws MathException if an error occurs computing the p-value
+ */
+ public double homoscedasticTTest(StatisticalSummary sampleStats1,
+ StatisticalSummary sampleStats2)
+ throws IllegalArgumentException, MathException {
+ checkSampleData(sampleStats1);
+ checkSampleData(sampleStats2);
+ return homoscedasticTTest(sampleStats1.getMean(),
+ sampleStats2.getMean(), sampleStats1.getVariance(),
+ sampleStats2.getVariance(), sampleStats1.getN(),
+ sampleStats2.getN());
+ }
+
+ /**
+ * Performs a
+ * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
+ * two-sided t-test</a> evaluating the null hypothesis that
+ * <code>sampleStats1</code> and <code>sampleStats2</code> describe
+ * datasets drawn from populations with the same mean, with significance
+ * level <code>alpha</code>. This test does not assume that the
+ * subpopulation variances are equal. To perform the test under the equal
+ * variances assumption, use
+ * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.
+ * <p>
+ * Returns <code>true</code> iff the null hypothesis that the means are
+ * equal can be rejected with confidence <code>1 - alpha</code>. To
+ * perform a 1-sided test, use <code>alpha * 2</code></p>
+ * <p>
+ * See {@link #t(double[], double[])} for the formula used to compute the
+ * t-statistic. Degrees of freedom are approximated using the
+ * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
+ * Welch-Satterthwaite approximation.</a></p>
+ * <p>
+ * <strong>Examples:</strong><br><ol>
+ * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
+ * the 95%, use
+ * <br><code>tTest(sampleStats1, sampleStats2, 0.05) </code>
+ * </li>
+ * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code>
+ * at the 99% level, first verify that the measured mean of
+ * <code>sample 1</code> is less than the mean of <code>sample 2</code>
+ * and then use
+ * <br><code>tTest(sampleStats1, sampleStats2, 0.02) </code>
+ * </li></ol></p>
+ * <p>
+ * <strong>Usage Note:</strong><br>
+ * The validity of the test depends on the assumptions of the parametric
+ * t-test procedure, as discussed
+ * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
+ * here</a></p>
+ * <p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>The datasets described by the two Univariates must each contain
+ * at least 2 observations.
+ * </li>
+ * <li> <code> 0 < alpha < 0.5 </code>
+ * </li></ul></p>
+ *
+ * @param sampleStats1 StatisticalSummary describing sample data values
+ * @param sampleStats2 StatisticalSummary describing sample data values
+ * @param alpha significance level of the test
+ * @return true if the null hypothesis can be rejected with
+ * confidence 1 - alpha
+ * @throws IllegalArgumentException if the preconditions are not met
+ * @throws MathException if an error occurs performing the test
+ */
+ public boolean tTest(StatisticalSummary sampleStats1,
+ StatisticalSummary sampleStats2, double alpha)
+ throws IllegalArgumentException, MathException {
+ checkSignificanceLevel(alpha);
+ return tTest(sampleStats1, sampleStats2) < alpha;
+ }
+
+ //----------------------------------------------- Protected methods
+
+ /**
+ * Computes approximate degrees of freedom for 2-sample t-test.
+ *
+ * @param v1 first sample variance
+ * @param v2 second sample variance
+ * @param n1 first sample n
+ * @param n2 second sample n
+ * @return approximate degrees of freedom
+ */
+ protected double df(double v1, double v2, double n1, double n2) {
+ return (((v1 / n1) + (v2 / n2)) * ((v1 / n1) + (v2 / n2))) /
+ ((v1 * v1) / (n1 * n1 * (n1 - 1d)) + (v2 * v2) /
+ (n2 * n2 * (n2 - 1d)));
+ }
+
+ /**
+ * Computes t test statistic for 1-sample t-test.
+ *
+ * @param m sample mean
+ * @param mu constant to test against
+ * @param v sample variance
+ * @param n sample n
+ * @return t test statistic
+ */
+ protected double t(double m, double mu, double v, double n) {
+ return (m - mu) / FastMath.sqrt(v / n);
+ }
+
+ /**
+ * Computes t test statistic for 2-sample t-test.
+ * <p>
+ * Does not assume that subpopulation variances are equal.</p>
+ *
+ * @param m1 first sample mean
+ * @param m2 second sample mean
+ * @param v1 first sample variance
+ * @param v2 second sample variance
+ * @param n1 first sample n
+ * @param n2 second sample n
+ * @return t test statistic
+ */
+ protected double t(double m1, double m2, double v1, double v2, double n1,
+ double n2) {
+ return (m1 - m2) / FastMath.sqrt((v1 / n1) + (v2 / n2));
+ }
+
+ /**
+ * Computes t test statistic for 2-sample t-test under the hypothesis
+ * of equal subpopulation variances.
+ *
+ * @param m1 first sample mean
+ * @param m2 second sample mean
+ * @param v1 first sample variance
+ * @param v2 second sample variance
+ * @param n1 first sample n
+ * @param n2 second sample n
+ * @return t test statistic
+ */
+ protected double homoscedasticT(double m1, double m2, double v1,
+ double v2, double n1, double n2) {
+ double pooledVariance = ((n1 - 1) * v1 + (n2 -1) * v2 ) / (n1 + n2 - 2);
+ return (m1 - m2) / FastMath.sqrt(pooledVariance * (1d / n1 + 1d / n2));
+ }
+
+ /**
+ * Computes p-value for 2-sided, 1-sample t-test.
+ *
+ * @param m sample mean
+ * @param mu constant to test against
+ * @param v sample variance
+ * @param n sample n
+ * @return p-value
+ * @throws MathException if an error occurs computing the p-value
+ */
+ protected double tTest(double m, double mu, double v, double n)
+ throws MathException {
+ double t = FastMath.abs(t(m, mu, v, n));
+ distribution.setDegreesOfFreedom(n - 1);
+ return 2.0 * distribution.cumulativeProbability(-t);
+ }
+
+ /**
+ * Computes p-value for 2-sided, 2-sample t-test.
+ * <p>
+ * Does not assume subpopulation variances are equal. Degrees of freedom
+ * are estimated from the data.</p>
+ *
+ * @param m1 first sample mean
+ * @param m2 second sample mean
+ * @param v1 first sample variance
+ * @param v2 second sample variance
+ * @param n1 first sample n
+ * @param n2 second sample n
+ * @return p-value
+ * @throws MathException if an error occurs computing the p-value
+ */
+ protected double tTest(double m1, double m2, double v1, double v2,
+ double n1, double n2)
+ throws MathException {
+ double t = FastMath.abs(t(m1, m2, v1, v2, n1, n2));
+ double degreesOfFreedom = 0;
+ degreesOfFreedom = df(v1, v2, n1, n2);
+ distribution.setDegreesOfFreedom(degreesOfFreedom);
+ return 2.0 * distribution.cumulativeProbability(-t);
+ }
+
+ /**
+ * Computes p-value for 2-sided, 2-sample t-test, under the assumption
+ * of equal subpopulation variances.
+ * <p>
+ * The sum of the sample sizes minus 2 is used as degrees of freedom.</p>
+ *
+ * @param m1 first sample mean
+ * @param m2 second sample mean
+ * @param v1 first sample variance
+ * @param v2 second sample variance
+ * @param n1 first sample n
+ * @param n2 second sample n
+ * @return p-value
+ * @throws MathException if an error occurs computing the p-value
+ */
+ protected double homoscedasticTTest(double m1, double m2, double v1,
+ double v2, double n1, double n2)
+ throws MathException {
+ double t = FastMath.abs(homoscedasticT(m1, m2, v1, v2, n1, n2));
+ double degreesOfFreedom = n1 + n2 - 2;
+ distribution.setDegreesOfFreedom(degreesOfFreedom);
+ return 2.0 * distribution.cumulativeProbability(-t);
+ }
+
+ /**
+ * Modify the distribution used to compute inference statistics.
+ * @param value the new distribution
+ * @since 1.2
+ * @deprecated in 2.2 (to be removed in 3.0).
+ */
+ @Deprecated
+ public void setDistribution(TDistribution value) {
+ distribution = value;
+ }
+
+ /** Check significance level.
+ * @param alpha significance level
+ * @exception IllegalArgumentException if significance level is out of bounds
+ */
+ private void checkSignificanceLevel(final double alpha)
+ throws IllegalArgumentException {
+ if ((alpha <= 0) || (alpha > 0.5)) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.OUT_OF_BOUND_SIGNIFICANCE_LEVEL,
+ alpha, 0.0, 0.5);
+ }
+ }
+
+ /** Check sample data.
+ * @param data sample data
+ * @exception IllegalArgumentException if there is not enough sample data
+ */
+ private void checkSampleData(final double[] data)
+ throws IllegalArgumentException {
+ if ((data == null) || (data.length < 2)) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.INSUFFICIENT_DATA_FOR_T_STATISTIC,
+ (data == null) ? 0 : data.length);
+ }
+ }
+
+ /** Check sample data.
+ * @param stat statistical summary
+ * @exception IllegalArgumentException if there is not enough sample data
+ */
+ private void checkSampleData(final StatisticalSummary stat)
+ throws IllegalArgumentException {
+ if ((stat == null) || (stat.getN() < 2)) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.INSUFFICIENT_DATA_FOR_T_STATISTIC,
+ (stat == null) ? 0 : stat.getN());
+ }
+ }
+
+}
diff --git a/src/main/java/org/apache/commons/math/stat/inference/TestUtils.java b/src/main/java/org/apache/commons/math/stat/inference/TestUtils.java
new file mode 100644
index 0000000..5023d55
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/inference/TestUtils.java
@@ -0,0 +1,436 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.inference;
+
+import java.util.Collection;
+import org.apache.commons.math.MathException;
+import org.apache.commons.math.stat.descriptive.StatisticalSummary;
+
+/**
+ * A collection of static methods to create inference test instances or to
+ * perform inference tests.
+ *
+ * <p>
+ * The set methods are not compatible with using the class in multiple threads,
+ * and have therefore been deprecated (along with the getters).
+ * The setters and getters will be removed in version 3.0.
+ *
+ * @since 1.1
+ * @version $Revision: 1067582 $ $Date: 2011-02-06 04:55:32 +0100 (dim. 06 févr. 2011) $
+ */
+public class TestUtils {
+
+ /** Singleton TTest instance using default implementation. */
+ private static TTest tTest = new TTestImpl();
+
+ /** Singleton ChiSquareTest instance using default implementation. */
+ private static ChiSquareTest chiSquareTest =
+ new ChiSquareTestImpl();
+
+ /** Singleton ChiSquareTest instance using default implementation. */
+ private static UnknownDistributionChiSquareTest unknownDistributionChiSquareTest =
+ new ChiSquareTestImpl();
+
+ /** Singleton OneWayAnova instance using default implementation. */
+ private static OneWayAnova oneWayAnova =
+ new OneWayAnovaImpl();
+
+ /**
+ * Prevent instantiation.
+ */
+ protected TestUtils() {
+ super();
+ }
+
+ /**
+ * Set the (singleton) TTest instance.
+ *
+ * @param chiSquareTest the new instance to use
+ * @since 1.2
+ * @deprecated 2.2 will be removed in 3.0 - not compatible with use from multiple threads
+ */
+ @Deprecated
+ public static void setChiSquareTest(TTest chiSquareTest) {
+ TestUtils.tTest = chiSquareTest;
+ }
+
+ /**
+ * Return a (singleton) TTest instance. Does not create a new instance.
+ *
+ * @return a TTest instance
+ * @deprecated 2.2 will be removed in 3.0
+ */
+ @Deprecated
+ public static TTest getTTest() {
+ return tTest;
+ }
+
+ /**
+ * Set the (singleton) ChiSquareTest instance.
+ *
+ * @param chiSquareTest the new instance to use
+ * @since 1.2
+ * @deprecated 2.2 will be removed in 3.0 - not compatible with use from multiple threads
+ */
+ @Deprecated
+ public static void setChiSquareTest(ChiSquareTest chiSquareTest) {
+ TestUtils.chiSquareTest = chiSquareTest;
+ }
+
+ /**
+ * Return a (singleton) ChiSquareTest instance. Does not create a new instance.
+ *
+ * @return a ChiSquareTest instance
+ * @deprecated 2.2 will be removed in 3.0
+ */
+ @Deprecated
+ public static ChiSquareTest getChiSquareTest() {
+ return chiSquareTest;
+ }
+
+ /**
+ * Set the (singleton) UnknownDistributionChiSquareTest instance.
+ *
+ * @param unknownDistributionChiSquareTest the new instance to use
+ * @since 1.2
+ * @deprecated 2.2 will be removed in 3.0 - not compatible with use from multiple threads
+ */
+ @Deprecated
+ public static void setUnknownDistributionChiSquareTest(UnknownDistributionChiSquareTest unknownDistributionChiSquareTest) {
+ TestUtils.unknownDistributionChiSquareTest = unknownDistributionChiSquareTest;
+ }
+
+ /**
+ * Return a (singleton) UnknownDistributionChiSquareTest instance. Does not create a new instance.
+ *
+ * @return a UnknownDistributionChiSquareTest instance
+ * @deprecated 2.2 will be removed in 3.0
+ */
+ @Deprecated
+ public static UnknownDistributionChiSquareTest getUnknownDistributionChiSquareTest() {
+ return unknownDistributionChiSquareTest;
+ }
+
+ /**
+ * Set the (singleton) OneWayAnova instance
+ *
+ * @param oneWayAnova the new instance to use
+ * @since 1.2
+ * @deprecated 2.2 will be removed in 3.0 - not compatible with use from multiple threads
+ */
+ @Deprecated
+ public static void setOneWayAnova(OneWayAnova oneWayAnova) {
+ TestUtils.oneWayAnova = oneWayAnova;
+ }
+
+ /**
+ * Return a (singleton) OneWayAnova instance. Does not create a new instance.
+ *
+ * @return a OneWayAnova instance
+ * @since 1.2
+ * @deprecated 2.2 will be removed in 3.0
+ */
+ @Deprecated
+ public static OneWayAnova getOneWayAnova() {
+ return oneWayAnova;
+ }
+
+
+ // CHECKSTYLE: stop JavadocMethodCheck
+
+ /**
+ * @see org.apache.commons.math.stat.inference.TTest#homoscedasticT(double[], double[])
+ */
+ public static double homoscedasticT(double[] sample1, double[] sample2)
+ throws IllegalArgumentException {
+ return tTest.homoscedasticT(sample1, sample2);
+ }
+
+ /**
+ * @see org.apache.commons.math.stat.inference.TTest#homoscedasticT(org.apache.commons.math.stat.descriptive.StatisticalSummary, org.apache.commons.math.stat.descriptive.StatisticalSummary)
+ */
+ public static double homoscedasticT(StatisticalSummary sampleStats1,
+ StatisticalSummary sampleStats2)
+ throws IllegalArgumentException {
+ return tTest.homoscedasticT(sampleStats1, sampleStats2);
+ }
+
+ /**
+ * @see org.apache.commons.math.stat.inference.TTest#homoscedasticTTest(double[], double[], double)
+ */
+ public static boolean homoscedasticTTest(double[] sample1, double[] sample2,
+ double alpha)
+ throws IllegalArgumentException, MathException {
+ return tTest. homoscedasticTTest(sample1, sample2, alpha);
+ }
+
+ /**
+ * @see org.apache.commons.math.stat.inference.TTest#homoscedasticTTest(double[], double[])
+ */
+ public static double homoscedasticTTest(double[] sample1, double[] sample2)
+ throws IllegalArgumentException, MathException {
+ return tTest.homoscedasticTTest(sample1, sample2);
+ }
+
+ /**
+ * @see org.apache.commons.math.stat.inference.TTest#homoscedasticTTest(org.apache.commons.math.stat.descriptive.StatisticalSummary, org.apache.commons.math.stat.descriptive.StatisticalSummary)
+ */
+ public static double homoscedasticTTest(StatisticalSummary sampleStats1,
+ StatisticalSummary sampleStats2)
+ throws IllegalArgumentException, MathException {
+ return tTest.homoscedasticTTest(sampleStats1, sampleStats2);
+ }
+
+ /**
+ * @see org.apache.commons.math.stat.inference.TTest#pairedT(double[], double[])
+ */
+ public static double pairedT(double[] sample1, double[] sample2)
+ throws IllegalArgumentException, MathException {
+ return tTest.pairedT(sample1, sample2);
+ }
+
+ /**
+ * @see org.apache.commons.math.stat.inference.TTest#pairedTTest(double[], double[], double)
+ */
+ public static boolean pairedTTest(double[] sample1, double[] sample2,
+ double alpha)
+ throws IllegalArgumentException, MathException {
+ return tTest.pairedTTest(sample1, sample2, alpha);
+ }
+
+ /**
+ * @see org.apache.commons.math.stat.inference.TTest#pairedTTest(double[], double[])
+ */
+ public static double pairedTTest(double[] sample1, double[] sample2)
+ throws IllegalArgumentException, MathException {
+ return tTest.pairedTTest(sample1, sample2);
+ }
+
+ /**
+ * @see org.apache.commons.math.stat.inference.TTest#t(double, double[])
+ */
+ public static double t(double mu, double[] observed)
+ throws IllegalArgumentException {
+ return tTest.t(mu, observed);
+ }
+
+ /**
+ * @see org.apache.commons.math.stat.inference.TTest#t(double, org.apache.commons.math.stat.descriptive.StatisticalSummary)
+ */
+ public static double t(double mu, StatisticalSummary sampleStats)
+ throws IllegalArgumentException {
+ return tTest.t(mu, sampleStats);
+ }
+
+ /**
+ * @see org.apache.commons.math.stat.inference.TTest#t(double[], double[])
+ */
+ public static double t(double[] sample1, double[] sample2)
+ throws IllegalArgumentException {
+ return tTest.t(sample1, sample2);
+ }
+
+ /**
+ * @see org.apache.commons.math.stat.inference.TTest#t(org.apache.commons.math.stat.descriptive.StatisticalSummary, org.apache.commons.math.stat.descriptive.StatisticalSummary)
+ */
+ public static double t(StatisticalSummary sampleStats1,
+ StatisticalSummary sampleStats2)
+ throws IllegalArgumentException {
+ return tTest.t(sampleStats1, sampleStats2);
+ }
+
+ /**
+ * @see org.apache.commons.math.stat.inference.TTest#tTest(double, double[], double)
+ */
+ public static boolean tTest(double mu, double[] sample, double alpha)
+ throws IllegalArgumentException, MathException {
+ return tTest.tTest(mu, sample, alpha);
+ }
+
+ /**
+ * @see org.apache.commons.math.stat.inference.TTest#tTest(double, double[])
+ */
+ public static double tTest(double mu, double[] sample)
+ throws IllegalArgumentException, MathException {
+ return tTest.tTest(mu, sample);
+ }
+
+ /**
+ * @see org.apache.commons.math.stat.inference.TTest#tTest(double, org.apache.commons.math.stat.descriptive.StatisticalSummary, double)
+ */
+ public static boolean tTest(double mu, StatisticalSummary sampleStats,
+ double alpha)
+ throws IllegalArgumentException, MathException {
+ return tTest. tTest(mu, sampleStats, alpha);
+ }
+
+ /**
+ * @see org.apache.commons.math.stat.inference.TTest#tTest(double, org.apache.commons.math.stat.descriptive.StatisticalSummary)
+ */
+ public static double tTest(double mu, StatisticalSummary sampleStats)
+ throws IllegalArgumentException, MathException {
+ return tTest.tTest(mu, sampleStats);
+ }
+
+ /**
+ * @see org.apache.commons.math.stat.inference.TTest#tTest(double[], double[], double)
+ */
+ public static boolean tTest(double[] sample1, double[] sample2, double alpha)
+ throws IllegalArgumentException, MathException {
+ return tTest.tTest(sample1, sample2, alpha);
+ }
+
+ /**
+ * @see org.apache.commons.math.stat.inference.TTest#tTest(double[], double[])
+ */
+ public static double tTest(double[] sample1, double[] sample2)
+ throws IllegalArgumentException, MathException {
+ return tTest.tTest(sample1, sample2);
+ }
+
+ /**
+ * @see org.apache.commons.math.stat.inference.TTest#tTest(org.apache.commons.math.stat.descriptive.StatisticalSummary, org.apache.commons.math.stat.descriptive.StatisticalSummary, double)
+ */
+ public static boolean tTest(StatisticalSummary sampleStats1,
+ StatisticalSummary sampleStats2, double alpha)
+ throws IllegalArgumentException, MathException {
+ return tTest. tTest(sampleStats1, sampleStats2, alpha);
+ }
+
+ /**
+ * @see org.apache.commons.math.stat.inference.TTest#tTest(org.apache.commons.math.stat.descriptive.StatisticalSummary, org.apache.commons.math.stat.descriptive.StatisticalSummary)
+ */
+ public static double tTest(StatisticalSummary sampleStats1,
+ StatisticalSummary sampleStats2)
+ throws IllegalArgumentException, MathException {
+ return tTest.tTest(sampleStats1, sampleStats2);
+ }
+
+ /**
+ * @see org.apache.commons.math.stat.inference.ChiSquareTest#chiSquare(double[], long[])
+ */
+ public static double chiSquare(double[] expected, long[] observed)
+ throws IllegalArgumentException {
+ return chiSquareTest.chiSquare(expected, observed);
+ }
+
+ /**
+ * @see org.apache.commons.math.stat.inference.ChiSquareTest#chiSquare(long[][])
+ */
+ public static double chiSquare(long[][] counts)
+ throws IllegalArgumentException {
+ return chiSquareTest.chiSquare(counts);
+ }
+
+ /**
+ * @see org.apache.commons.math.stat.inference.ChiSquareTest#chiSquareTest(double[], long[], double)
+ */
+ public static boolean chiSquareTest(double[] expected, long[] observed,
+ double alpha)
+ throws IllegalArgumentException, MathException {
+ return chiSquareTest.chiSquareTest(expected, observed, alpha);
+ }
+
+ /**
+ * @see org.apache.commons.math.stat.inference.ChiSquareTest#chiSquareTest(double[], long[])
+ */
+ public static double chiSquareTest(double[] expected, long[] observed)
+ throws IllegalArgumentException, MathException {
+ return chiSquareTest.chiSquareTest(expected, observed);
+ }
+
+ /**
+ * @see org.apache.commons.math.stat.inference.ChiSquareTest#chiSquareTest(long[][], double)
+ */
+ public static boolean chiSquareTest(long[][] counts, double alpha)
+ throws IllegalArgumentException, MathException {
+ return chiSquareTest. chiSquareTest(counts, alpha);
+ }
+
+ /**
+ * @see org.apache.commons.math.stat.inference.ChiSquareTest#chiSquareTest(long[][])
+ */
+ public static double chiSquareTest(long[][] counts)
+ throws IllegalArgumentException, MathException {
+ return chiSquareTest. chiSquareTest(counts);
+ }
+
+ /**
+ * @see org.apache.commons.math.stat.inference.UnknownDistributionChiSquareTest#chiSquareDataSetsComparison(long[], long[])
+ *
+ * @since 1.2
+ */
+ public static double chiSquareDataSetsComparison(long[] observed1, long[] observed2)
+ throws IllegalArgumentException {
+ return unknownDistributionChiSquareTest.chiSquareDataSetsComparison(observed1, observed2);
+ }
+
+ /**
+ * @see org.apache.commons.math.stat.inference.UnknownDistributionChiSquareTest#chiSquareTestDataSetsComparison(long[], long[])
+ *
+ * @since 1.2
+ */
+ public static double chiSquareTestDataSetsComparison(long[] observed1, long[] observed2)
+ throws IllegalArgumentException, MathException {
+ return unknownDistributionChiSquareTest.chiSquareTestDataSetsComparison(observed1, observed2);
+ }
+
+
+ /**
+ * @see org.apache.commons.math.stat.inference.UnknownDistributionChiSquareTest#chiSquareTestDataSetsComparison(long[], long[], double)
+ *
+ * @since 1.2
+ */
+ public static boolean chiSquareTestDataSetsComparison(long[] observed1, long[] observed2,
+ double alpha)
+ throws IllegalArgumentException, MathException {
+ return unknownDistributionChiSquareTest.chiSquareTestDataSetsComparison(observed1, observed2, alpha);
+ }
+
+ /**
+ * @see org.apache.commons.math.stat.inference.OneWayAnova#anovaFValue(Collection)
+ *
+ * @since 1.2
+ */
+ public static double oneWayAnovaFValue(Collection<double[]> categoryData)
+ throws IllegalArgumentException, MathException {
+ return oneWayAnova.anovaFValue(categoryData);
+ }
+
+ /**
+ * @see org.apache.commons.math.stat.inference.OneWayAnova#anovaPValue(Collection)
+ *
+ * @since 1.2
+ */
+ public static double oneWayAnovaPValue(Collection<double[]> categoryData)
+ throws IllegalArgumentException, MathException {
+ return oneWayAnova.anovaPValue(categoryData);
+ }
+
+ /**
+ * @see org.apache.commons.math.stat.inference.OneWayAnova#anovaTest(Collection,double)
+ *
+ * @since 1.2
+ */
+ public static boolean oneWayAnovaTest(Collection<double[]> categoryData, double alpha)
+ throws IllegalArgumentException, MathException {
+ return oneWayAnova.anovaTest(categoryData, alpha);
+ }
+
+ // CHECKSTYLE: resume JavadocMethodCheck
+
+}
diff --git a/src/main/java/org/apache/commons/math/stat/inference/UnknownDistributionChiSquareTest.java b/src/main/java/org/apache/commons/math/stat/inference/UnknownDistributionChiSquareTest.java
new file mode 100644
index 0000000..662e4d6
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/inference/UnknownDistributionChiSquareTest.java
@@ -0,0 +1,144 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.inference;
+
+import org.apache.commons.math.MathException;
+
+/**
+ * An interface for Chi-Square tests for unknown distributions.
+ * <p>Two samples tests are used when the distribution is unknown <i>a priori</i>
+ * but provided by one sample. We compare the second sample against the first.</p>
+ *
+ * @version $Revision: 811685 $ $Date: 2009-09-05 19:36:48 +0200 (sam. 05 sept. 2009) $
+ * @since 1.2
+ */
+public interface UnknownDistributionChiSquareTest extends ChiSquareTest {
+
+ /**
+ * <p>Computes a
+ * <a href="http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/chi2samp.htm">
+ * Chi-Square two sample test statistic</a> comparing bin frequency counts
+ * in <code>observed1</code> and <code>observed2</code>. The
+ * sums of frequency counts in the two samples are not required to be the
+ * same. The formula used to compute the test statistic is</p>
+ * <code>
+ * &sum;[(K * observed1[i] - observed2[i]/K)<sup>2</sup> / (observed1[i] + observed2[i])]
+ * </code> where
+ * <br/><code>K = &sqrt;[&sum(observed2 / &sum;(observed1)]</code>
+ * </p>
+ * <p>This statistic can be used to perform a Chi-Square test evaluating the null hypothesis that
+ * both observed counts follow the same distribution.</p>
+ * <p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>Observed counts must be non-negative.
+ * </li>
+ * <li>Observed counts for a specific bin must not both be zero.
+ * </li>
+ * <li>Observed counts for a specific sample must not all be 0.
+ * </li>
+ * <li>The arrays <code>observed1</code> and <code>observed2</code> must have the same length and
+ * their common length must be at least 2.
+ * </li></ul></p><p>
+ * If any of the preconditions are not met, an
+ * <code>IllegalArgumentException</code> is thrown.</p>
+ *
+ * @param observed1 array of observed frequency counts of the first data set
+ * @param observed2 array of observed frequency counts of the second data set
+ * @return chiSquare statistic
+ * @throws IllegalArgumentException if preconditions are not met
+ */
+ double chiSquareDataSetsComparison(long[] observed1, long[] observed2)
+ throws IllegalArgumentException;
+
+ /**
+ * <p>Returns the <i>observed significance level</i>, or <a href=
+ * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
+ * p-value</a>, associated with a Chi-Square two sample test comparing
+ * bin frequency counts in <code>observed1</code> and
+ * <code>observed2</code>.
+ * </p>
+ * <p>The number returned is the smallest significance level at which one
+ * can reject the null hypothesis that the observed counts conform to the
+ * same distribution.
+ * </p>
+ * <p>See {@link #chiSquareDataSetsComparison(long[], long[])} for details
+ * on the formula used to compute the test statistic. The degrees of
+ * of freedom used to perform the test is one less than the common length
+ * of the input observed count arrays.
+ * </p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>Observed counts must be non-negative.
+ * </li>
+ * <li>Observed counts for a specific bin must not both be zero.
+ * </li>
+ * <li>Observed counts for a specific sample must not all be 0.
+ * </li>
+ * <li>The arrays <code>observed1</code> and <code>observed2</code> must
+ * have the same length and
+ * their common length must be at least 2.
+ * </li></ul><p>
+ * If any of the preconditions are not met, an
+ * <code>IllegalArgumentException</code> is thrown.</p>
+ *
+ * @param observed1 array of observed frequency counts of the first data set
+ * @param observed2 array of observed frequency counts of the second data set
+ * @return p-value
+ * @throws IllegalArgumentException if preconditions are not met
+ * @throws MathException if an error occurs computing the p-value
+ */
+ double chiSquareTestDataSetsComparison(long[] observed1, long[] observed2)
+ throws IllegalArgumentException, MathException;
+
+ /**
+ * <p>Performs a Chi-Square two sample test comparing two binned data
+ * sets. The test evaluates the null hypothesis that the two lists of
+ * observed counts conform to the same frequency distribution, with
+ * significance level <code>alpha</code>. Returns true iff the null
+ * hypothesis can be rejected with 100 * (1 - alpha) percent confidence.
+ * </p>
+ * <p>See {@link #chiSquareDataSetsComparison(long[], long[])} for
+ * details on the formula used to compute the Chisquare statistic used
+ * in the test. The degrees of of freedom used to perform the test is
+ * one less than the common length of the input observed count arrays.
+ * </p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>Observed counts must be non-negative.
+ * </li>
+ * <li>Observed counts for a specific bin must not both be zero.
+ * </li>
+ * <li>Observed counts for a specific sample must not all be 0.
+ * </li>
+ * <li>The arrays <code>observed1</code> and <code>observed2</code> must
+ * have the same length and their common length must be at least 2.
+ * </li>
+ * <li> <code> 0 < alpha < 0.5 </code>
+ * </li></ul><p>
+ * If any of the preconditions are not met, an
+ * <code>IllegalArgumentException</code> is thrown.</p>
+ *
+ * @param observed1 array of observed frequency counts of the first data set
+ * @param observed2 array of observed frequency counts of the second data set
+ * @param alpha significance level of the test
+ * @return true iff null hypothesis can be rejected with confidence
+ * 1 - alpha
+ * @throws IllegalArgumentException if preconditions are not met
+ * @throws MathException if an error occurs performing the test
+ */
+ boolean chiSquareTestDataSetsComparison(long[] observed1, long[] observed2, double alpha)
+ throws IllegalArgumentException, MathException;
+
+}
diff --git a/src/main/java/org/apache/commons/math/stat/inference/package.html b/src/main/java/org/apache/commons/math/stat/inference/package.html
new file mode 100644
index 0000000..288eebf
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/inference/package.html
@@ -0,0 +1,23 @@
+<html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+ <!-- $Revision: 480440 $ $Date: 2006-11-29 08:14:12 +0100 (mer. 29 nov. 2006) $ -->
+ <body>
+ Classes providing hypothesis testing and confidence interval
+ construction.
+ </body>
+</html>
diff --git a/src/main/java/org/apache/commons/math/stat/package.html b/src/main/java/org/apache/commons/math/stat/package.html
new file mode 100644
index 0000000..d62d67a
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/package.html
@@ -0,0 +1,20 @@
+<html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+ <!-- $Revision: 480440 $ $Date: 2006-11-29 08:14:12 +0100 (mer. 29 nov. 2006) $ -->
+ <body>Data storage, manipulation and summary routines.</body>
+</html>
diff --git a/src/main/java/org/apache/commons/math/stat/ranking/NaNStrategy.java b/src/main/java/org/apache/commons/math/stat/ranking/NaNStrategy.java
new file mode 100644
index 0000000..cffa7d1
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/ranking/NaNStrategy.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.math.stat.ranking;
+
+/**
+ * Strategies for handling NaN values in rank transformations.
+ * <ul>
+ * <li>MINIMAL - NaNs are treated as minimal in the ordering, equivalent to
+ * (that is, tied with) <code>Double.NEGATIVE_INFINITY</code>.</li>
+ * <li>MAXIMAL - NaNs are treated as maximal in the ordering, equivalent to
+ * <code>Double.POSITIVE_INFINITY</code></li>
+ * <li>REMOVED - NaNs are removed before the rank transform is applied</li>
+ * <li>FIXED - NaNs are left "in place," that is the rank transformation is
+ * applied to the other elements in the input array, but the NaN elements
+ * are returned unchanged.</li>
+ * </ul>
+ *
+ * @since 2.0
+ * @version $Revision: 811685 $ $Date: 2009-09-05 19:36:48 +0200 (sam. 05 sept. 2009) $
+ */
+public enum NaNStrategy {
+
+ /** NaNs are considered minimal in the ordering */
+ MINIMAL,
+
+ /** NaNs are considered maximal in the ordering */
+ MAXIMAL,
+
+ /** NaNs are removed before computing ranks */
+ REMOVED,
+
+ /** NaNs are left in place */
+ FIXED
+}
diff --git a/src/main/java/org/apache/commons/math/stat/ranking/NaturalRanking.java b/src/main/java/org/apache/commons/math/stat/ranking/NaturalRanking.java
new file mode 100644
index 0000000..f51189c
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/ranking/NaturalRanking.java
@@ -0,0 +1,464 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.math.stat.ranking;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.commons.math.exception.MathInternalError;
+import org.apache.commons.math.random.RandomData;
+import org.apache.commons.math.random.RandomDataImpl;
+import org.apache.commons.math.random.RandomGenerator;
+import org.apache.commons.math.util.FastMath;
+
+
+/**
+ * <p> Ranking based on the natural ordering on doubles.</p>
+ * <p>NaNs are treated according to the configured {@link NaNStrategy} and ties
+ * are handled using the selected {@link TiesStrategy}.
+ * Configuration settings are supplied in optional constructor arguments.
+ * Defaults are {@link NaNStrategy#MAXIMAL} and {@link TiesStrategy#AVERAGE},
+ * respectively. When using {@link TiesStrategy#RANDOM}, a
+ * {@link RandomGenerator} may be supplied as a constructor argument.</p>
+ * <p>Examples:
+ * <table border="1" cellpadding="3">
+ * <tr><th colspan="3">
+ * Input data: (20, 17, 30, 42.3, 17, 50, Double.NaN, Double.NEGATIVE_INFINITY, 17)
+ * </th></tr>
+ * <tr><th>NaNStrategy</th><th>TiesStrategy</th>
+ * <th><code>rank(data)</code></th>
+ * <tr>
+ * <td>default (NaNs maximal)</td>
+ * <td>default (ties averaged)</td>
+ * <td>(5, 3, 6, 7, 3, 8, 9, 1, 3)</td></tr>
+ * <tr>
+ * <td>default (NaNs maximal)</td>
+ * <td>MINIMUM</td>
+ * <td>(5, 2, 6, 7, 2, 8, 9, 1, 2)</td></tr>
+ * <tr>
+ * <td>MINIMAL</td>
+ * <td>default (ties averaged)</td>
+ * <td>(6, 4, 7, 8, 4, 9, 1.5, 1.5, 4)</td></tr>
+ * <tr>
+ * <td>REMOVED</td>
+ * <td>SEQUENTIAL</td>
+ * <td>(5, 2, 6, 7, 3, 8, 1, 4)</td></tr>
+ * <tr>
+ * <td>MINIMAL</td>
+ * <td>MAXIMUM</td>
+ * <td>(6, 5, 7, 8, 5, 9, 2, 2, 5)</td></tr></table></p>
+ *
+ * @since 2.0
+ * @version $Revision: 1061496 $ $Date: 2011-01-20 21:32:16 +0100 (jeu. 20 janv. 2011) $
+ */
+public class NaturalRanking implements RankingAlgorithm {
+
+ /** default NaN strategy */
+ public static final NaNStrategy DEFAULT_NAN_STRATEGY = NaNStrategy.MAXIMAL;
+
+ /** default ties strategy */
+ public static final TiesStrategy DEFAULT_TIES_STRATEGY = TiesStrategy.AVERAGE;
+
+ /** NaN strategy - defaults to NaNs maximal */
+ private final NaNStrategy nanStrategy;
+
+ /** Ties strategy - defaults to ties averaged */
+ private final TiesStrategy tiesStrategy;
+
+ /** Source of random data - used only when ties strategy is RANDOM */
+ private final RandomData randomData;
+
+ /**
+ * Create a NaturalRanking with default strategies for handling ties and NaNs.
+ */
+ public NaturalRanking() {
+ super();
+ tiesStrategy = DEFAULT_TIES_STRATEGY;
+ nanStrategy = DEFAULT_NAN_STRATEGY;
+ randomData = null;
+ }
+
+ /**
+ * Create a NaturalRanking with the given TiesStrategy.
+ *
+ * @param tiesStrategy the TiesStrategy to use
+ */
+ public NaturalRanking(TiesStrategy tiesStrategy) {
+ super();
+ this.tiesStrategy = tiesStrategy;
+ nanStrategy = DEFAULT_NAN_STRATEGY;
+ randomData = new RandomDataImpl();
+ }
+
+ /**
+ * Create a NaturalRanking with the given NaNStrategy.
+ *
+ * @param nanStrategy the NaNStrategy to use
+ */
+ public NaturalRanking(NaNStrategy nanStrategy) {
+ super();
+ this.nanStrategy = nanStrategy;
+ tiesStrategy = DEFAULT_TIES_STRATEGY;
+ randomData = null;
+ }
+
+ /**
+ * Create a NaturalRanking with the given NaNStrategy and TiesStrategy.
+ *
+ * @param nanStrategy NaNStrategy to use
+ * @param tiesStrategy TiesStrategy to use
+ */
+ public NaturalRanking(NaNStrategy nanStrategy, TiesStrategy tiesStrategy) {
+ super();
+ this.nanStrategy = nanStrategy;
+ this.tiesStrategy = tiesStrategy;
+ randomData = new RandomDataImpl();
+ }
+
+ /**
+ * Create a NaturalRanking with TiesStrategy.RANDOM and the given
+ * RandomGenerator as the source of random data.
+ *
+ * @param randomGenerator source of random data
+ */
+ public NaturalRanking(RandomGenerator randomGenerator) {
+ super();
+ this.tiesStrategy = TiesStrategy.RANDOM;
+ nanStrategy = DEFAULT_NAN_STRATEGY;
+ randomData = new RandomDataImpl(randomGenerator);
+ }
+
+
+ /**
+ * Create a NaturalRanking with the given NaNStrategy, TiesStrategy.RANDOM
+ * and the given source of random data.
+ *
+ * @param nanStrategy NaNStrategy to use
+ * @param randomGenerator source of random data
+ */
+ public NaturalRanking(NaNStrategy nanStrategy,
+ RandomGenerator randomGenerator) {
+ super();
+ this.nanStrategy = nanStrategy;
+ this.tiesStrategy = TiesStrategy.RANDOM;
+ randomData = new RandomDataImpl(randomGenerator);
+ }
+
+ /**
+ * Return the NaNStrategy
+ *
+ * @return returns the NaNStrategy
+ */
+ public NaNStrategy getNanStrategy() {
+ return nanStrategy;
+ }
+
+ /**
+ * Return the TiesStrategy
+ *
+ * @return the TiesStrategy
+ */
+ public TiesStrategy getTiesStrategy() {
+ return tiesStrategy;
+ }
+
+ /**
+ * Rank <code>data</code> using the natural ordering on Doubles, with
+ * NaN values handled according to <code>nanStrategy</code> and ties
+ * resolved using <code>tiesStrategy.</code>
+ *
+ * @param data array to be ranked
+ * @return array of ranks
+ */
+ public double[] rank(double[] data) {
+
+ // Array recording initial positions of data to be ranked
+ IntDoublePair[] ranks = new IntDoublePair[data.length];
+ for (int i = 0; i < data.length; i++) {
+ ranks[i] = new IntDoublePair(data[i], i);
+ }
+
+ // Recode, remove or record positions of NaNs
+ List<Integer> nanPositions = null;
+ switch (nanStrategy) {
+ case MAXIMAL: // Replace NaNs with +INFs
+ recodeNaNs(ranks, Double.POSITIVE_INFINITY);
+ break;
+ case MINIMAL: // Replace NaNs with -INFs
+ recodeNaNs(ranks, Double.NEGATIVE_INFINITY);
+ break;
+ case REMOVED: // Drop NaNs from data
+ ranks = removeNaNs(ranks);
+ break;
+ case FIXED: // Record positions of NaNs
+ nanPositions = getNanPositions(ranks);
+ break;
+ default: // this should not happen unless NaNStrategy enum is changed
+ throw new MathInternalError();
+ }
+
+ // Sort the IntDoublePairs
+ Arrays.sort(ranks);
+
+ // Walk the sorted array, filling output array using sorted positions,
+ // resolving ties as we go
+ double[] out = new double[ranks.length];
+ int pos = 1; // position in sorted array
+ out[ranks[0].getPosition()] = pos;
+ List<Integer> tiesTrace = new ArrayList<Integer>();
+ tiesTrace.add(ranks[0].getPosition());
+ for (int i = 1; i < ranks.length; i++) {
+ if (Double.compare(ranks[i].getValue(), ranks[i - 1].getValue()) > 0) {
+ // tie sequence has ended (or had length 1)
+ pos = i + 1;
+ if (tiesTrace.size() > 1) { // if seq is nontrivial, resolve
+ resolveTie(out, tiesTrace);
+ }
+ tiesTrace = new ArrayList<Integer>();
+ tiesTrace.add(ranks[i].getPosition());
+ } else {
+ // tie sequence continues
+ tiesTrace.add(ranks[i].getPosition());
+ }
+ out[ranks[i].getPosition()] = pos;
+ }
+ if (tiesTrace.size() > 1) { // handle tie sequence at end
+ resolveTie(out, tiesTrace);
+ }
+ if (nanStrategy == NaNStrategy.FIXED) {
+ restoreNaNs(out, nanPositions);
+ }
+ return out;
+ }
+
+ /**
+ * Returns an array that is a copy of the input array with IntDoublePairs
+ * having NaN values removed.
+ *
+ * @param ranks input array
+ * @return array with NaN-valued entries removed
+ */
+ private IntDoublePair[] removeNaNs(IntDoublePair[] ranks) {
+ if (!containsNaNs(ranks)) {
+ return ranks;
+ }
+ IntDoublePair[] outRanks = new IntDoublePair[ranks.length];
+ int j = 0;
+ for (int i = 0; i < ranks.length; i++) {
+ if (Double.isNaN(ranks[i].getValue())) {
+ // drop, but adjust original ranks of later elements
+ for (int k = i + 1; k < ranks.length; k++) {
+ ranks[k] = new IntDoublePair(
+ ranks[k].getValue(), ranks[k].getPosition() - 1);
+ }
+ } else {
+ outRanks[j] = new IntDoublePair(
+ ranks[i].getValue(), ranks[i].getPosition());
+ j++;
+ }
+ }
+ IntDoublePair[] returnRanks = new IntDoublePair[j];
+ System.arraycopy(outRanks, 0, returnRanks, 0, j);
+ return returnRanks;
+ }
+
+ /**
+ * Recodes NaN values to the given value.
+ *
+ * @param ranks array to recode
+ * @param value the value to replace NaNs with
+ */
+ private void recodeNaNs(IntDoublePair[] ranks, double value) {
+ for (int i = 0; i < ranks.length; i++) {
+ if (Double.isNaN(ranks[i].getValue())) {
+ ranks[i] = new IntDoublePair(
+ value, ranks[i].getPosition());
+ }
+ }
+ }
+
+ /**
+ * Checks for presence of NaNs in <code>ranks.</code>
+ *
+ * @param ranks array to be searched for NaNs
+ * @return true iff ranks contains one or more NaNs
+ */
+ private boolean containsNaNs(IntDoublePair[] ranks) {
+ for (int i = 0; i < ranks.length; i++) {
+ if (Double.isNaN(ranks[i].getValue())) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Resolve a sequence of ties, using the configured {@link TiesStrategy}.
+ * The input <code>ranks</code> array is expected to take the same value
+ * for all indices in <code>tiesTrace</code>. The common value is recoded
+ * according to the tiesStrategy. For example, if ranks = <5,8,2,6,2,7,1,2>,
+ * tiesTrace = <2,4,7> and tiesStrategy is MINIMUM, ranks will be unchanged.
+ * The same array and trace with tiesStrategy AVERAGE will come out
+ * <5,8,3,6,3,7,1,3>.
+ *
+ * @param ranks array of ranks
+ * @param tiesTrace list of indices where <code>ranks</code> is constant
+ * -- that is, for any i and j in TiesTrace, <code> ranks[i] == ranks[j]
+ * </code>
+ */
+ private void resolveTie(double[] ranks, List<Integer> tiesTrace) {
+
+ // constant value of ranks over tiesTrace
+ final double c = ranks[tiesTrace.get(0)];
+
+ // length of sequence of tied ranks
+ final int length = tiesTrace.size();
+
+ switch (tiesStrategy) {
+ case AVERAGE: // Replace ranks with average
+ fill(ranks, tiesTrace, (2 * c + length - 1) / 2d);
+ break;
+ case MAXIMUM: // Replace ranks with maximum values
+ fill(ranks, tiesTrace, c + length - 1);
+ break;
+ case MINIMUM: // Replace ties with minimum
+ fill(ranks, tiesTrace, c);
+ break;
+ case RANDOM: // Fill with random integral values in [c, c + length - 1]
+ Iterator<Integer> iterator = tiesTrace.iterator();
+ long f = FastMath.round(c);
+ while (iterator.hasNext()) {
+ ranks[iterator.next()] =
+ randomData.nextLong(f, f + length - 1);
+ }
+ break;
+ case SEQUENTIAL: // Fill sequentially from c to c + length - 1
+ // walk and fill
+ iterator = tiesTrace.iterator();
+ f = FastMath.round(c);
+ int i = 0;
+ while (iterator.hasNext()) {
+ ranks[iterator.next()] = f + i++;
+ }
+ break;
+ default: // this should not happen unless TiesStrategy enum is changed
+ throw new MathInternalError();
+ }
+ }
+
+ /**
+ * Sets<code>data[i] = value</code> for each i in <code>tiesTrace.</code>
+ *
+ * @param data array to modify
+ * @param tiesTrace list of index values to set
+ * @param value value to set
+ */
+ private void fill(double[] data, List<Integer> tiesTrace, double value) {
+ Iterator<Integer> iterator = tiesTrace.iterator();
+ while (iterator.hasNext()) {
+ data[iterator.next()] = value;
+ }
+ }
+
+ /**
+ * Set <code>ranks[i] = Double.NaN</code> for each i in <code>nanPositions.</code>
+ *
+ * @param ranks array to modify
+ * @param nanPositions list of index values to set to <code>Double.NaN</code>
+ */
+ private void restoreNaNs(double[] ranks, List<Integer> nanPositions) {
+ if (nanPositions.size() == 0) {
+ return;
+ }
+ Iterator<Integer> iterator = nanPositions.iterator();
+ while (iterator.hasNext()) {
+ ranks[iterator.next().intValue()] = Double.NaN;
+ }
+
+ }
+
+ /**
+ * Returns a list of indexes where <code>ranks</code> is <code>NaN.</code>
+ *
+ * @param ranks array to search for <code>NaNs</code>
+ * @return list of indexes i such that <code>ranks[i] = NaN</code>
+ */
+ private List<Integer> getNanPositions(IntDoublePair[] ranks) {
+ ArrayList<Integer> out = new ArrayList<Integer>();
+ for (int i = 0; i < ranks.length; i++) {
+ if (Double.isNaN(ranks[i].getValue())) {
+ out.add(Integer.valueOf(i));
+ }
+ }
+ return out;
+ }
+
+ /**
+ * Represents the position of a double value in an ordering.
+ * Comparable interface is implemented so Arrays.sort can be used
+ * to sort an array of IntDoublePairs by value. Note that the
+ * implicitly defined natural ordering is NOT consistent with equals.
+ */
+ private static class IntDoublePair implements Comparable<IntDoublePair> {
+
+ /** Value of the pair */
+ private final double value;
+
+ /** Original position of the pair */
+ private final int position;
+
+ /**
+ * Construct an IntDoublePair with the given value and position.
+ * @param value the value of the pair
+ * @param position the original position
+ */
+ public IntDoublePair(double value, int position) {
+ this.value = value;
+ this.position = position;
+ }
+
+ /**
+ * Compare this IntDoublePair to another pair.
+ * Only the <strong>values</strong> are compared.
+ *
+ * @param other the other pair to compare this to
+ * @return result of <code>Double.compare(value, other.value)</code>
+ */
+ public int compareTo(IntDoublePair other) {
+ return Double.compare(value, other.value);
+ }
+
+ /**
+ * Returns the value of the pair.
+ * @return value
+ */
+ public double getValue() {
+ return value;
+ }
+
+ /**
+ * Returns the original position of the pair.
+ * @return position
+ */
+ public int getPosition() {
+ return position;
+ }
+ }
+}
diff --git a/src/main/java/org/apache/commons/math/stat/ranking/RankingAlgorithm.java b/src/main/java/org/apache/commons/math/stat/ranking/RankingAlgorithm.java
new file mode 100644
index 0000000..b01f324
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/ranking/RankingAlgorithm.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.math.stat.ranking;
+
+/**
+ * Interface representing a rank transformation.
+ *
+ * @since 2.0
+ * @version $Revision: 811685 $ $Date: 2009-09-05 19:36:48 +0200 (sam. 05 sept. 2009) $
+ */
+public interface RankingAlgorithm {
+ /**
+ * <p>Performs a rank transformation on the input data, returning an array
+ * of ranks.</p>
+ *
+ * <p>Ranks should be 1-based - that is, the smallest value
+ * returned in an array of ranks should be greater than or equal to one,
+ * rather than 0. Ranks should in general take integer values, though
+ * implementations may return averages or other floating point values
+ * to resolve ties in the input data.</p>
+ *
+ * @param data array of data to be ranked
+ * @return an array of ranks corresponding to the elements of the input array
+ */
+ double[] rank (double[] data);
+}
diff --git a/src/main/java/org/apache/commons/math/stat/ranking/TiesStrategy.java b/src/main/java/org/apache/commons/math/stat/ranking/TiesStrategy.java
new file mode 100644
index 0000000..794c229
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/ranking/TiesStrategy.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.math.stat.ranking;
+
+/**
+ * Strategies for handling tied values in rank transformations.
+ * <ul>
+ * <li>SEQUENTIAL - Ties are assigned ranks in order of occurrence in the original array,
+ * for example (1,3,4,3) is ranked as (1,2,4,3)</li>
+ * <li>MINIMUM - Tied values are assigned the minimum applicable rank, or the rank
+ * of the first occurrence. For example, (1,3,4,3) is ranked as (1,2,4,2)</li>
+ * <li>MAXIMUM - Tied values are assigned the maximum applicable rank, or the rank
+ * of the last occurrence. For example, (1,3,4,3) is ranked as (1,3,4,3)</li>
+ * <li>AVERAGE - Tied values are assigned the average of the applicable ranks.
+ * For example, (1,3,4,3) is ranked as (1,2.5,4,2.5)</li>
+ * <li>RANDOM - Tied values are assigned a random integer rank from among the
+ * applicable values. The assigned rank will always be an integer, (inclusively)
+ * between the values returned by the MINIMUM and MAXIMUM strategies.</li>
+ * </ul>
+ *
+ * @since 2.0
+ * @version $Revision: 981332 $ $Date: 2010-08-02 00:24:31 +0200 (lun. 02 août 2010) $
+ */
+public enum TiesStrategy {
+
+ /** Ties assigned sequential ranks in order of occurrence */
+ SEQUENTIAL,
+
+ /** Ties get the minimum applicable rank */
+ MINIMUM,
+
+ /** Ties get the maximum applicable rank */
+ MAXIMUM,
+
+ /** Ties get the average of applicable ranks */
+ AVERAGE,
+
+ /** Ties get a random integral value from among applicable ranks */
+ RANDOM
+}
diff --git a/src/main/java/org/apache/commons/math/stat/ranking/package.html b/src/main/java/org/apache/commons/math/stat/ranking/package.html
new file mode 100644
index 0000000..63e0c4a
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/ranking/package.html
@@ -0,0 +1,22 @@
+<html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+ <!-- $Revision:$ $Date:$ -->
+ <body>
+ Classes providing rank transformations.
+ </body>
+</html>
diff --git a/src/main/java/org/apache/commons/math/stat/regression/AbstractMultipleLinearRegression.java b/src/main/java/org/apache/commons/math/stat/regression/AbstractMultipleLinearRegression.java
new file mode 100644
index 0000000..9757682
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/regression/AbstractMultipleLinearRegression.java
@@ -0,0 +1,366 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.regression;
+
+import org.apache.commons.math.MathRuntimeException;
+import org.apache.commons.math.exception.util.LocalizedFormats;
+import org.apache.commons.math.linear.RealMatrix;
+import org.apache.commons.math.linear.Array2DRowRealMatrix;
+import org.apache.commons.math.linear.RealVector;
+import org.apache.commons.math.linear.ArrayRealVector;
+import org.apache.commons.math.stat.descriptive.moment.Variance;
+import org.apache.commons.math.util.FastMath;
+
+/**
+ * Abstract base class for implementations of MultipleLinearRegression.
+ * @version $Revision: 1073459 $ $Date: 2011-02-22 20:18:12 +0100 (mar. 22 févr. 2011) $
+ * @since 2.0
+ */
+public abstract class AbstractMultipleLinearRegression implements
+ MultipleLinearRegression {
+
+ /** X sample data. */
+ protected RealMatrix X;
+
+ /** Y sample data. */
+ protected RealVector Y;
+
+ /** Whether or not the regression model includes an intercept. True means no intercept. */
+ private boolean noIntercept = false;
+
+ /**
+ * @return true if the model has no intercept term; false otherwise
+ * @since 2.2
+ */
+ public boolean isNoIntercept() {
+ return noIntercept;
+ }
+
+ /**
+ * @param noIntercept true means the model is to be estimated without an intercept term
+ * @since 2.2
+ */
+ public void setNoIntercept(boolean noIntercept) {
+ this.noIntercept = noIntercept;
+ }
+
+ /**
+ * <p>Loads model x and y sample data from a flat input array, overriding any previous sample.
+ * </p>
+ * <p>Assumes that rows are concatenated with y values first in each row. For example, an input
+ * <code>data</code> array containing the sequence of values (1, 2, 3, 4, 5, 6, 7, 8, 9) with
+ * <code>nobs = 3</code> and <code>nvars = 2</code> creates a regression dataset with two
+ * independent variables, as below:
+ * <pre>
+ * y x[0] x[1]
+ * --------------
+ * 1 2 3
+ * 4 5 6
+ * 7 8 9
+ * </pre>
+ * </p>
+ * <p>Note that there is no need to add an initial unitary column (column of 1's) when
+ * specifying a model including an intercept term. If {@link #isNoIntercept()} is <code>true</code>,
+ * the X matrix will be created without an initial column of "1"s; otherwise this column will
+ * be added.
+ * </p>
+ * <p>Throws IllegalArgumentException if any of the following preconditions fail:
+ * <ul><li><code>data</code> cannot be null</li>
+ * <li><code>data.length = nobs * (nvars + 1)</li>
+ * <li><code>nobs > nvars</code></li></ul>
+ * </p>
+ *
+ * @param data input data array
+ * @param nobs number of observations (rows)
+ * @param nvars number of independent variables (columns, not counting y)
+ * @throws IllegalArgumentException if the preconditions are not met
+ */
+ public void newSampleData(double[] data, int nobs, int nvars) {
+ if (data == null) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.NULL_NOT_ALLOWED);
+ }
+ if (data.length != nobs * (nvars + 1)) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.INVALID_REGRESSION_ARRAY, data.length, nobs, nvars);
+ }
+ if (nobs <= nvars) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS);
+ }
+ double[] y = new double[nobs];
+ final int cols = noIntercept ? nvars: nvars + 1;
+ double[][] x = new double[nobs][cols];
+ int pointer = 0;
+ for (int i = 0; i < nobs; i++) {
+ y[i] = data[pointer++];
+ if (!noIntercept) {
+ x[i][0] = 1.0d;
+ }
+ for (int j = noIntercept ? 0 : 1; j < cols; j++) {
+ x[i][j] = data[pointer++];
+ }
+ }
+ this.X = new Array2DRowRealMatrix(x);
+ this.Y = new ArrayRealVector(y);
+ }
+
+ /**
+ * Loads new y sample data, overriding any previous data.
+ *
+ * @param y the array representing the y sample
+ * @throws IllegalArgumentException if y is null or empty
+ */
+ protected void newYSampleData(double[] y) {
+ if (y == null) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.NULL_NOT_ALLOWED);
+ }
+ if (y.length == 0) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.NO_DATA);
+ }
+ this.Y = new ArrayRealVector(y);
+ }
+
+ /**
+ * <p>Loads new x sample data, overriding any previous data.
+ * </p>
+ * The input <code>x</code> array should have one row for each sample
+ * observation, with columns corresponding to independent variables.
+ * For example, if <pre>
+ * <code> x = new double[][] {{1, 2}, {3, 4}, {5, 6}} </code></pre>
+ * then <code>setXSampleData(x) </code> results in a model with two independent
+ * variables and 3 observations:
+ * <pre>
+ * x[0] x[1]
+ * ----------
+ * 1 2
+ * 3 4
+ * 5 6
+ * </pre>
+ * </p>
+ * <p>Note that there is no need to add an initial unitary column (column of 1's) when
+ * specifying a model including an intercept term.
+ * </p>
+ * @param x the rectangular array representing the x sample
+ * @throws IllegalArgumentException if x is null, empty or not rectangular
+ */
+ protected void newXSampleData(double[][] x) {
+ if (x == null) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.NULL_NOT_ALLOWED);
+ }
+ if (x.length == 0) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.NO_DATA);
+ }
+ if (noIntercept) {
+ this.X = new Array2DRowRealMatrix(x, true);
+ } else { // Augment design matrix with initial unitary column
+ final int nVars = x[0].length;
+ final double[][] xAug = new double[x.length][nVars + 1];
+ for (int i = 0; i < x.length; i++) {
+ if (x[i].length != nVars) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.DIFFERENT_ROWS_LENGTHS,
+ x[i].length, nVars);
+ }
+ xAug[i][0] = 1.0d;
+ System.arraycopy(x[i], 0, xAug[i], 1, nVars);
+ }
+ this.X = new Array2DRowRealMatrix(xAug, false);
+ }
+ }
+
+ /**
+ * Validates sample data. Checks that
+ * <ul><li>Neither x nor y is null or empty;</li>
+ * <li>The length (i.e. number of rows) of x equals the length of y</li>
+ * <li>x has at least one more row than it has columns (i.e. there is
+ * sufficient data to estimate regression coefficients for each of the
+ * columns in x plus an intercept.</li>
+ * </ul>
+ *
+ * @param x the [n,k] array representing the x data
+ * @param y the [n,1] array representing the y data
+ * @throws IllegalArgumentException if any of the checks fail
+ *
+ */
+ protected void validateSampleData(double[][] x, double[] y) {
+ if ((x == null) || (y == null) || (x.length != y.length)) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE,
+ (x == null) ? 0 : x.length,
+ (y == null) ? 0 : y.length);
+ }
+ if (x.length == 0) { // Must be no y data either
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.NO_DATA);
+ }
+ if (x[0].length + 1 > x.length) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
+ x.length, x[0].length);
+ }
+ }
+
+ /**
+ * Validates that the x data and covariance matrix have the same
+ * number of rows and that the covariance matrix is square.
+ *
+ * @param x the [n,k] array representing the x sample
+ * @param covariance the [n,n] array representing the covariance matrix
+ * @throws IllegalArgumentException if the number of rows in x is not equal
+ * to the number of rows in covariance or covariance is not square.
+ */
+ protected void validateCovarianceData(double[][] x, double[][] covariance) {
+ if (x.length != covariance.length) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE, x.length, covariance.length);
+ }
+ if (covariance.length > 0 && covariance.length != covariance[0].length) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.NON_SQUARE_MATRIX,
+ covariance.length, covariance[0].length);
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public double[] estimateRegressionParameters() {
+ RealVector b = calculateBeta();
+ return b.getData();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public double[] estimateResiduals() {
+ RealVector b = calculateBeta();
+ RealVector e = Y.subtract(X.operate(b));
+ return e.getData();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public double[][] estimateRegressionParametersVariance() {
+ return calculateBetaVariance().getData();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public double[] estimateRegressionParametersStandardErrors() {
+ double[][] betaVariance = estimateRegressionParametersVariance();
+ double sigma = calculateErrorVariance();
+ int length = betaVariance[0].length;
+ double[] result = new double[length];
+ for (int i = 0; i < length; i++) {
+ result[i] = FastMath.sqrt(sigma * betaVariance[i][i]);
+ }
+ return result;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public double estimateRegressandVariance() {
+ return calculateYVariance();
+ }
+
+ /**
+ * Estimates the variance of the error.
+ *
+ * @return estimate of the error variance
+ * @since 2.2
+ */
+ public double estimateErrorVariance() {
+ return calculateErrorVariance();
+
+ }
+
+ /**
+ * Estimates the standard error of the regression.
+ *
+ * @return regression standard error
+ * @since 2.2
+ */
+ public double estimateRegressionStandardError() {
+ return Math.sqrt(estimateErrorVariance());
+ }
+
+ /**
+ * Calculates the beta of multiple linear regression in matrix notation.
+ *
+ * @return beta
+ */
+ protected abstract RealVector calculateBeta();
+
+ /**
+ * Calculates the beta variance of multiple linear regression in matrix
+ * notation.
+ *
+ * @return beta variance
+ */
+ protected abstract RealMatrix calculateBetaVariance();
+
+
+ /**
+ * Calculates the variance of the y values.
+ *
+ * @return Y variance
+ */
+ protected double calculateYVariance() {
+ return new Variance().evaluate(Y.getData());
+ }
+
+ /**
+ * <p>Calculates the variance of the error term.</p>
+ * Uses the formula <pre>
+ * var(u) = u &middot; u / (n - k)
+ * </pre>
+ * where n and k are the row and column dimensions of the design
+ * matrix X.
+ *
+ * @return error variance estimate
+ * @since 2.2
+ */
+ protected double calculateErrorVariance() {
+ RealVector residuals = calculateResiduals();
+ return residuals.dotProduct(residuals) /
+ (X.getRowDimension() - X.getColumnDimension());
+ }
+
+ /**
+ * Calculates the residuals of multiple linear regression in matrix
+ * notation.
+ *
+ * <pre>
+ * u = y - X * b
+ * </pre>
+ *
+ * @return The residuals [n,1] matrix
+ */
+ protected RealVector calculateResiduals() {
+ RealVector b = calculateBeta();
+ return Y.subtract(X.operate(b));
+ }
+
+}
diff --git a/src/main/java/org/apache/commons/math/stat/regression/GLSMultipleLinearRegression.java b/src/main/java/org/apache/commons/math/stat/regression/GLSMultipleLinearRegression.java
new file mode 100644
index 0000000..dc6ef0d
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/regression/GLSMultipleLinearRegression.java
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.regression;
+
+import org.apache.commons.math.linear.LUDecompositionImpl;
+import org.apache.commons.math.linear.RealMatrix;
+import org.apache.commons.math.linear.Array2DRowRealMatrix;
+import org.apache.commons.math.linear.RealVector;
+
+/**
+ * The GLS implementation of the multiple linear regression.
+ *
+ * GLS assumes a general covariance matrix Omega of the error
+ * <pre>
+ * u ~ N(0, Omega)
+ * </pre>
+ *
+ * Estimated by GLS,
+ * <pre>
+ * b=(X' Omega^-1 X)^-1X'Omega^-1 y
+ * </pre>
+ * whose variance is
+ * <pre>
+ * Var(b)=(X' Omega^-1 X)^-1
+ * </pre>
+ * @version $Revision: 1073460 $ $Date: 2011-02-22 20:22:39 +0100 (mar. 22 févr. 2011) $
+ * @since 2.0
+ */
+public class GLSMultipleLinearRegression extends AbstractMultipleLinearRegression {
+
+ /** Covariance matrix. */
+ private RealMatrix Omega;
+
+ /** Inverse of covariance matrix. */
+ private RealMatrix OmegaInverse;
+
+ /** Replace sample data, overriding any previous sample.
+ * @param y y values of the sample
+ * @param x x values of the sample
+ * @param covariance array representing the covariance matrix
+ */
+ public void newSampleData(double[] y, double[][] x, double[][] covariance) {
+ validateSampleData(x, y);
+ newYSampleData(y);
+ newXSampleData(x);
+ validateCovarianceData(x, covariance);
+ newCovarianceData(covariance);
+ }
+
+ /**
+ * Add the covariance data.
+ *
+ * @param omega the [n,n] array representing the covariance
+ */
+ protected void newCovarianceData(double[][] omega){
+ this.Omega = new Array2DRowRealMatrix(omega);
+ this.OmegaInverse = null;
+ }
+
+ /**
+ * Get the inverse of the covariance.
+ * <p>The inverse of the covariance matrix is lazily evaluated and cached.</p>
+ * @return inverse of the covariance
+ */
+ protected RealMatrix getOmegaInverse() {
+ if (OmegaInverse == null) {
+ OmegaInverse = new LUDecompositionImpl(Omega).getSolver().getInverse();
+ }
+ return OmegaInverse;
+ }
+
+ /**
+ * Calculates beta by GLS.
+ * <pre>
+ * b=(X' Omega^-1 X)^-1X'Omega^-1 y
+ * </pre>
+ * @return beta
+ */
+ @Override
+ protected RealVector calculateBeta() {
+ RealMatrix OI = getOmegaInverse();
+ RealMatrix XT = X.transpose();
+ RealMatrix XTOIX = XT.multiply(OI).multiply(X);
+ RealMatrix inverse = new LUDecompositionImpl(XTOIX).getSolver().getInverse();
+ return inverse.multiply(XT).multiply(OI).operate(Y);
+ }
+
+ /**
+ * Calculates the variance on the beta.
+ * <pre>
+ * Var(b)=(X' Omega^-1 X)^-1
+ * </pre>
+ * @return The beta variance matrix
+ */
+ @Override
+ protected RealMatrix calculateBetaVariance() {
+ RealMatrix OI = getOmegaInverse();
+ RealMatrix XTOIX = X.transpose().multiply(OI).multiply(X);
+ return new LUDecompositionImpl(XTOIX).getSolver().getInverse();
+ }
+
+
+ /**
+ * Calculates the estimated variance of the error term using the formula
+ * <pre>
+ * Var(u) = Tr(u' Omega^-1 u)/(n-k)
+ * </pre>
+ * where n and k are the row and column dimensions of the design
+ * matrix X.
+ *
+ * @return error variance
+ * @since 2.2
+ */
+ @Override
+ protected double calculateErrorVariance() {
+ RealVector residuals = calculateResiduals();
+ double t = residuals.dotProduct(getOmegaInverse().operate(residuals));
+ return t / (X.getRowDimension() - X.getColumnDimension());
+
+ }
+
+}
diff --git a/src/main/java/org/apache/commons/math/stat/regression/MultipleLinearRegression.java b/src/main/java/org/apache/commons/math/stat/regression/MultipleLinearRegression.java
new file mode 100644
index 0000000..b7aabd4
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/regression/MultipleLinearRegression.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.regression;
+
+/**
+ * The multiple linear regression can be represented in matrix-notation.
+ * <pre>
+ * y=X*b+u
+ * </pre>
+ * where y is an <code>n-vector</code> <b>regressand</b>, X is a <code>[n,k]</code> matrix whose <code>k</code> columns are called
+ * <b>regressors</b>, b is <code>k-vector</code> of <b>regression parameters</b> and <code>u</code> is an <code>n-vector</code>
+ * of <b>error terms</b> or <b>residuals</b>.
+ *
+ * The notation is quite standard in literature,
+ * cf eg <a href="http://www.econ.queensu.ca/ETM">Davidson and MacKinnon, Econometrics Theory and Methods, 2004</a>.
+ * @version $Revision: 811685 $ $Date: 2009-09-05 19:36:48 +0200 (sam. 05 sept. 2009) $
+ * @since 2.0
+ */
+public interface MultipleLinearRegression {
+
+ /**
+ * Estimates the regression parameters b.
+ *
+ * @return The [k,1] array representing b
+ */
+ double[] estimateRegressionParameters();
+
+ /**
+ * Estimates the variance of the regression parameters, ie Var(b).
+ *
+ * @return The [k,k] array representing the variance of b
+ */
+ double[][] estimateRegressionParametersVariance();
+
+ /**
+ * Estimates the residuals, ie u = y - X*b.
+ *
+ * @return The [n,1] array representing the residuals
+ */
+ double[] estimateResiduals();
+
+ /**
+ * Returns the variance of the regressand, ie Var(y).
+ *
+ * @return The double representing the variance of y
+ */
+ double estimateRegressandVariance();
+
+ /**
+ * Returns the standard errors of the regression parameters.
+ *
+ * @return standard errors of estimated regression parameters
+ */
+ double[] estimateRegressionParametersStandardErrors();
+
+}
diff --git a/src/main/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegression.java b/src/main/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegression.java
new file mode 100644
index 0000000..22a59e8
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/regression/OLSMultipleLinearRegression.java
@@ -0,0 +1,233 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.regression;
+
+import org.apache.commons.math.linear.Array2DRowRealMatrix;
+import org.apache.commons.math.linear.LUDecompositionImpl;
+import org.apache.commons.math.linear.QRDecomposition;
+import org.apache.commons.math.linear.QRDecompositionImpl;
+import org.apache.commons.math.linear.RealMatrix;
+import org.apache.commons.math.linear.RealVector;
+import org.apache.commons.math.stat.StatUtils;
+import org.apache.commons.math.stat.descriptive.moment.SecondMoment;
+
+/**
+ * <p>Implements ordinary least squares (OLS) to estimate the parameters of a
+ * multiple linear regression model.</p>
+ *
+ * <p>The regression coefficients, <code>b</code>, satisfy the normal equations:
+ * <pre><code> X<sup>T</sup> X b = X<sup>T</sup> y </code></pre></p>
+ *
+ * <p>To solve the normal equations, this implementation uses QR decomposition
+ * of the <code>X</code> matrix. (See {@link QRDecompositionImpl} for details on the
+ * decomposition algorithm.) The <code>X</code> matrix, also known as the <i>design matrix,</i>
+ * has rows corresponding to sample observations and columns corresponding to independent
+ * variables. When the model is estimated using an intercept term (i.e. when
+ * {@link #isNoIntercept() isNoIntercept} is false as it is by default), the <code>X</code>
+ * matrix includes an initial column identically equal to 1. We solve the normal equations
+ * as follows:
+ * <pre><code> X<sup>T</sup>X b = X<sup>T</sup> y
+ * (QR)<sup>T</sup> (QR) b = (QR)<sup>T</sup>y
+ * R<sup>T</sup> (Q<sup>T</sup>Q) R b = R<sup>T</sup> Q<sup>T</sup> y
+ * R<sup>T</sup> R b = R<sup>T</sup> Q<sup>T</sup> y
+ * (R<sup>T</sup>)<sup>-1</sup> R<sup>T</sup> R b = (R<sup>T</sup>)<sup>-1</sup> R<sup>T</sup> Q<sup>T</sup> y
+ * R b = Q<sup>T</sup> y </code></pre></p>
+ *
+ * <p>Given <code>Q</code> and <code>R</code>, the last equation is solved by back-substitution.</p>
+ *
+ * @version $Revision: 1073464 $ $Date: 2011-02-22 20:35:02 +0100 (mar. 22 févr. 2011) $
+ * @since 2.0
+ */
+public class OLSMultipleLinearRegression extends AbstractMultipleLinearRegression {
+
+ /** Cached QR decomposition of X matrix */
+ private QRDecomposition qr = null;
+
+ /**
+ * Loads model x and y sample data, overriding any previous sample.
+ *
+ * Computes and caches QR decomposition of the X matrix.
+ * @param y the [n,1] array representing the y sample
+ * @param x the [n,k] array representing the x sample
+ * @throws IllegalArgumentException if the x and y array data are not
+ * compatible for the regression
+ */
+ public void newSampleData(double[] y, double[][] x) {
+ validateSampleData(x, y);
+ newYSampleData(y);
+ newXSampleData(x);
+ }
+
+ /**
+ * {@inheritDoc}
+ * <p>This implementation computes and caches the QR decomposition of the X matrix.</p>
+ */
+ @Override
+ public void newSampleData(double[] data, int nobs, int nvars) {
+ super.newSampleData(data, nobs, nvars);
+ qr = new QRDecompositionImpl(X);
+ }
+
+ /**
+ * <p>Compute the "hat" matrix.
+ * </p>
+ * <p>The hat matrix is defined in terms of the design matrix X
+ * by X(X<sup>T</sup>X)<sup>-1</sup>X<sup>T</sup>
+ * </p>
+ * <p>The implementation here uses the QR decomposition to compute the
+ * hat matrix as Q I<sub>p</sub>Q<sup>T</sup> where I<sub>p</sub> is the
+ * p-dimensional identity matrix augmented by 0's. This computational
+ * formula is from "The Hat Matrix in Regression and ANOVA",
+ * David C. Hoaglin and Roy E. Welsch,
+ * <i>The American Statistician</i>, Vol. 32, No. 1 (Feb., 1978), pp. 17-22.
+ *
+ * @return the hat matrix
+ */
+ public RealMatrix calculateHat() {
+ // Create augmented identity matrix
+ RealMatrix Q = qr.getQ();
+ final int p = qr.getR().getColumnDimension();
+ final int n = Q.getColumnDimension();
+ Array2DRowRealMatrix augI = new Array2DRowRealMatrix(n, n);
+ double[][] augIData = augI.getDataRef();
+ for (int i = 0; i < n; i++) {
+ for (int j =0; j < n; j++) {
+ if (i == j && i < p) {
+ augIData[i][j] = 1d;
+ } else {
+ augIData[i][j] = 0d;
+ }
+ }
+ }
+
+ // Compute and return Hat matrix
+ return Q.multiply(augI).multiply(Q.transpose());
+ }
+
+ /**
+ * <p>Returns the sum of squared deviations of Y from its mean.</p>
+ *
+ * <p>If the model has no intercept term, <code>0</code> is used for the
+ * mean of Y - i.e., what is returned is the sum of the squared Y values.</p>
+ *
+ * <p>The value returned by this method is the SSTO value used in
+ * the {@link #calculateRSquared() R-squared} computation.</p>
+ *
+ * @return SSTO - the total sum of squares
+ * @see #isNoIntercept()
+ * @since 2.2
+ */
+ public double calculateTotalSumOfSquares() {
+ if (isNoIntercept()) {
+ return StatUtils.sumSq(Y.getData());
+ } else {
+ return new SecondMoment().evaluate(Y.getData());
+ }
+ }
+
+ /**
+ * Returns the sum of squared residuals.
+ *
+ * @return residual sum of squares
+ * @since 2.2
+ */
+ public double calculateResidualSumOfSquares() {
+ final RealVector residuals = calculateResiduals();
+ return residuals.dotProduct(residuals);
+ }
+
+ /**
+ * Returns the R-Squared statistic, defined by the formula <pre>
+ * R<sup>2</sup> = 1 - SSR / SSTO
+ * </pre>
+ * where SSR is the {@link #calculateResidualSumOfSquares() sum of squared residuals}
+ * and SSTO is the {@link #calculateTotalSumOfSquares() total sum of squares}
+ *
+ * @return R-square statistic
+ * @since 2.2
+ */
+ public double calculateRSquared() {
+ return 1 - calculateResidualSumOfSquares() / calculateTotalSumOfSquares();
+ }
+
+ /**
+ * <p>Returns the adjusted R-squared statistic, defined by the formula <pre>
+ * R<sup>2</sup><sub>adj</sub> = 1 - [SSR (n - 1)] / [SSTO (n - p)]
+ * </pre>
+ * where SSR is the {@link #calculateResidualSumOfSquares() sum of squared residuals},
+ * SSTO is the {@link #calculateTotalSumOfSquares() total sum of squares}, n is the number
+ * of observations and p is the number of parameters estimated (including the intercept).</p>
+ *
+ * <p>If the regression is estimated without an intercept term, what is returned is <pre>
+ * <code> 1 - (1 - {@link #calculateRSquared()}) * (n / (n - p)) </code>
+ * </pre></p>
+ *
+ * @return adjusted R-Squared statistic
+ * @see #isNoIntercept()
+ * @since 2.2
+ */
+ public double calculateAdjustedRSquared() {
+ final double n = X.getRowDimension();
+ if (isNoIntercept()) {
+ return 1 - (1 - calculateRSquared()) * (n / (n - X.getColumnDimension()));
+ } else {
+ return 1 - (calculateResidualSumOfSquares() * (n - 1)) /
+ (calculateTotalSumOfSquares() * (n - X.getColumnDimension()));
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ * <p>This implementation computes and caches the QR decomposition of the X matrix
+ * once it is successfully loaded.</p>
+ */
+ @Override
+ protected void newXSampleData(double[][] x) {
+ super.newXSampleData(x);
+ qr = new QRDecompositionImpl(X);
+ }
+
+ /**
+ * Calculates the regression coefficients using OLS.
+ *
+ * @return beta
+ */
+ @Override
+ protected RealVector calculateBeta() {
+ return qr.getSolver().solve(Y);
+ }
+
+ /**
+ * <p>Calculates the variance-covariance matrix of the regression parameters.
+ * </p>
+ * <p>Var(b) = (X<sup>T</sup>X)<sup>-1</sup>
+ * </p>
+ * <p>Uses QR decomposition to reduce (X<sup>T</sup>X)<sup>-1</sup>
+ * to (R<sup>T</sup>R)<sup>-1</sup>, with only the top p rows of
+ * R included, where p = the length of the beta vector.</p>
+ *
+ * @return The beta variance-covariance matrix
+ */
+ @Override
+ protected RealMatrix calculateBetaVariance() {
+ int p = X.getColumnDimension();
+ RealMatrix Raug = qr.getR().getSubMatrix(0, p - 1 , 0, p - 1);
+ RealMatrix Rinv = new LUDecompositionImpl(Raug).getSolver().getInverse();
+ return Rinv.multiply(Rinv.transpose());
+ }
+
+}
diff --git a/src/main/java/org/apache/commons/math/stat/regression/SimpleRegression.java b/src/main/java/org/apache/commons/math/stat/regression/SimpleRegression.java
new file mode 100644
index 0000000..d950541
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/regression/SimpleRegression.java
@@ -0,0 +1,639 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.math.stat.regression;
+import java.io.Serializable;
+
+import org.apache.commons.math.MathException;
+import org.apache.commons.math.MathRuntimeException;
+import org.apache.commons.math.distribution.TDistribution;
+import org.apache.commons.math.distribution.TDistributionImpl;
+import org.apache.commons.math.exception.util.LocalizedFormats;
+import org.apache.commons.math.util.FastMath;
+
+/**
+ * Estimates an ordinary least squares regression model
+ * with one independent variable.
+ * <p>
+ * <code> y = intercept + slope * x </code></p>
+ * <p>
+ * Standard errors for <code>intercept</code> and <code>slope</code> are
+ * available as well as ANOVA, r-square and Pearson's r statistics.</p>
+ * <p>
+ * Observations (x,y pairs) can be added to the model one at a time or they
+ * can be provided in a 2-dimensional array. The observations are not stored
+ * in memory, so there is no limit to the number of observations that can be
+ * added to the model.</p>
+ * <p>
+ * <strong>Usage Notes</strong>: <ul>
+ * <li> When there are fewer than two observations in the model, or when
+ * there is no variation in the x values (i.e. all x values are the same)
+ * all statistics return <code>NaN</code>. At least two observations with
+ * different x coordinates are requred to estimate a bivariate regression
+ * model.
+ * </li>
+ * <li> getters for the statistics always compute values based on the current
+ * set of observations -- i.e., you can get statistics, then add more data
+ * and get updated statistics without using a new instance. There is no
+ * "compute" method that updates all statistics. Each of the getters performs
+ * the necessary computations to return the requested statistic.</li>
+ * </ul></p>
+ *
+ * @version $Revision: 1042336 $ $Date: 2010-12-05 13:40:48 +0100 (dim. 05 déc. 2010) $
+ */
+public class SimpleRegression implements Serializable {
+
+ /** Serializable version identifier */
+ private static final long serialVersionUID = -3004689053607543335L;
+
+ /** the distribution used to compute inference statistics. */
+ private TDistribution distribution;
+
+ /** sum of x values */
+ private double sumX = 0d;
+
+ /** total variation in x (sum of squared deviations from xbar) */
+ private double sumXX = 0d;
+
+ /** sum of y values */
+ private double sumY = 0d;
+
+ /** total variation in y (sum of squared deviations from ybar) */
+ private double sumYY = 0d;
+
+ /** sum of products */
+ private double sumXY = 0d;
+
+ /** number of observations */
+ private long n = 0;
+
+ /** mean of accumulated x values, used in updating formulas */
+ private double xbar = 0;
+
+ /** mean of accumulated y values, used in updating formulas */
+ private double ybar = 0;
+
+ // ---------------------Public methods--------------------------------------
+
+ /**
+ * Create an empty SimpleRegression instance
+ */
+ public SimpleRegression() {
+ this(new TDistributionImpl(1.0));
+ }
+
+ /**
+ * Create an empty SimpleRegression using the given distribution object to
+ * compute inference statistics.
+ * @param t the distribution used to compute inference statistics.
+ * @since 1.2
+ * @deprecated in 2.2 (to be removed in 3.0). Please use the {@link
+ * #SimpleRegression(int) other constructor} instead.
+ */
+ @Deprecated
+ public SimpleRegression(TDistribution t) {
+ super();
+ setDistribution(t);
+ }
+
+ /**
+ * Create an empty SimpleRegression.
+ *
+ * @param degrees Number of degrees of freedom of the distribution
+ * used to compute inference statistics.
+ * @since 2.2
+ */
+ public SimpleRegression(int degrees) {
+ setDistribution(new TDistributionImpl(degrees));
+ }
+
+ /**
+ * Adds the observation (x,y) to the regression data set.
+ * <p>
+ * Uses updating formulas for means and sums of squares defined in
+ * "Algorithms for Computing the Sample Variance: Analysis and
+ * Recommendations", Chan, T.F., Golub, G.H., and LeVeque, R.J.
+ * 1983, American Statistician, vol. 37, pp. 242-247, referenced in
+ * Weisberg, S. "Applied Linear Regression". 2nd Ed. 1985.</p>
+ *
+ *
+ * @param x independent variable value
+ * @param y dependent variable value
+ */
+ public void addData(double x, double y) {
+ if (n == 0) {
+ xbar = x;
+ ybar = y;
+ } else {
+ double dx = x - xbar;
+ double dy = y - ybar;
+ sumXX += dx * dx * (double) n / (n + 1d);
+ sumYY += dy * dy * (double) n / (n + 1d);
+ sumXY += dx * dy * (double) n / (n + 1d);
+ xbar += dx / (n + 1.0);
+ ybar += dy / (n + 1.0);
+ }
+ sumX += x;
+ sumY += y;
+ n++;
+
+ if (n > 2) {
+ distribution.setDegreesOfFreedom(n - 2);
+ }
+ }
+
+
+ /**
+ * Removes the observation (x,y) from the regression data set.
+ * <p>
+ * Mirrors the addData method. This method permits the use of
+ * SimpleRegression instances in streaming mode where the regression
+ * is applied to a sliding "window" of observations, however the caller is
+ * responsible for maintaining the set of observations in the window.</p>
+ *
+ * The method has no effect if there are no points of data (i.e. n=0)
+ *
+ * @param x independent variable value
+ * @param y dependent variable value
+ */
+ public void removeData(double x, double y) {
+ if (n > 0) {
+ double dx = x - xbar;
+ double dy = y - ybar;
+ sumXX -= dx * dx * (double) n / (n - 1d);
+ sumYY -= dy * dy * (double) n / (n - 1d);
+ sumXY -= dx * dy * (double) n / (n - 1d);
+ xbar -= dx / (n - 1.0);
+ ybar -= dy / (n - 1.0);
+ sumX -= x;
+ sumY -= y;
+ n--;
+
+ if (n > 2) {
+ distribution.setDegreesOfFreedom(n - 2);
+ }
+ }
+ }
+
+ /**
+ * Adds the observations represented by the elements in
+ * <code>data</code>.
+ * <p>
+ * <code>(data[0][0],data[0][1])</code> will be the first observation, then
+ * <code>(data[1][0],data[1][1])</code>, etc.</p>
+ * <p>
+ * This method does not replace data that has already been added. The
+ * observations represented by <code>data</code> are added to the existing
+ * dataset.</p>
+ * <p>
+ * To replace all data, use <code>clear()</code> before adding the new
+ * data.</p>
+ *
+ * @param data array of observations to be added
+ */
+ public void addData(double[][] data) {
+ for (int i = 0; i < data.length; i++) {
+ addData(data[i][0], data[i][1]);
+ }
+ }
+
+
+ /**
+ * Removes observations represented by the elements in <code>data</code>.
+ * <p>
+ * If the array is larger than the current n, only the first n elements are
+ * processed. This method permits the use of SimpleRegression instances in
+ * streaming mode where the regression is applied to a sliding "window" of
+ * observations, however the caller is responsible for maintaining the set
+ * of observations in the window.</p>
+ * <p>
+ * To remove all data, use <code>clear()</code>.</p>
+ *
+ * @param data array of observations to be removed
+ */
+ public void removeData(double[][] data) {
+ for (int i = 0; i < data.length && n > 0; i++) {
+ removeData(data[i][0], data[i][1]);
+ }
+ }
+
+ /**
+ * Clears all data from the model.
+ */
+ public void clear() {
+ sumX = 0d;
+ sumXX = 0d;
+ sumY = 0d;
+ sumYY = 0d;
+ sumXY = 0d;
+ n = 0;
+ }
+
+ /**
+ * Returns the number of observations that have been added to the model.
+ *
+ * @return n number of observations that have been added.
+ */
+ public long getN() {
+ return n;
+ }
+
+ /**
+ * Returns the "predicted" <code>y</code> value associated with the
+ * supplied <code>x</code> value, based on the data that has been
+ * added to the model when this method is activated.
+ * <p>
+ * <code> predict(x) = intercept + slope * x </code></p>
+ * <p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>At least two observations (with at least two different x values)
+ * must have been added before invoking this method. If this method is
+ * invoked before a model can be estimated, <code>Double,NaN</code> is
+ * returned.
+ * </li></ul></p>
+ *
+ * @param x input <code>x</code> value
+ * @return predicted <code>y</code> value
+ */
+ public double predict(double x) {
+ double b1 = getSlope();
+ return getIntercept(b1) + b1 * x;
+ }
+
+ /**
+ * Returns the intercept of the estimated regression line.
+ * <p>
+ * The least squares estimate of the intercept is computed using the
+ * <a href="http://www.xycoon.com/estimation4.htm">normal equations</a>.
+ * The intercept is sometimes denoted b0.</p>
+ * <p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>At least two observations (with at least two different x values)
+ * must have been added before invoking this method. If this method is
+ * invoked before a model can be estimated, <code>Double,NaN</code> is
+ * returned.
+ * </li></ul></p>
+ *
+ * @return the intercept of the regression line
+ */
+ public double getIntercept() {
+ return getIntercept(getSlope());
+ }
+
+ /**
+ * Returns the slope of the estimated regression line.
+ * <p>
+ * The least squares estimate of the slope is computed using the
+ * <a href="http://www.xycoon.com/estimation4.htm">normal equations</a>.
+ * The slope is sometimes denoted b1.</p>
+ * <p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>At least two observations (with at least two different x values)
+ * must have been added before invoking this method. If this method is
+ * invoked before a model can be estimated, <code>Double.NaN</code> is
+ * returned.
+ * </li></ul></p>
+ *
+ * @return the slope of the regression line
+ */
+ public double getSlope() {
+ if (n < 2) {
+ return Double.NaN; //not enough data
+ }
+ if (FastMath.abs(sumXX) < 10 * Double.MIN_VALUE) {
+ return Double.NaN; //not enough variation in x
+ }
+ return sumXY / sumXX;
+ }
+
+ /**
+ * Returns the <a href="http://www.xycoon.com/SumOfSquares.htm">
+ * sum of squared errors</a> (SSE) associated with the regression
+ * model.
+ * <p>
+ * The sum is computed using the computational formula</p>
+ * <p>
+ * <code>SSE = SYY - (SXY * SXY / SXX)</code></p>
+ * <p>
+ * where <code>SYY</code> is the sum of the squared deviations of the y
+ * values about their mean, <code>SXX</code> is similarly defined and
+ * <code>SXY</code> is the sum of the products of x and y mean deviations.
+ * </p><p>
+ * The sums are accumulated using the updating algorithm referenced in
+ * {@link #addData}.</p>
+ * <p>
+ * The return value is constrained to be non-negative - i.e., if due to
+ * rounding errors the computational formula returns a negative result,
+ * 0 is returned.</p>
+ * <p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>At least two observations (with at least two different x values)
+ * must have been added before invoking this method. If this method is
+ * invoked before a model can be estimated, <code>Double,NaN</code> is
+ * returned.
+ * </li></ul></p>
+ *
+ * @return sum of squared errors associated with the regression model
+ */
+ public double getSumSquaredErrors() {
+ return FastMath.max(0d, sumYY - sumXY * sumXY / sumXX);
+ }
+
+ /**
+ * Returns the sum of squared deviations of the y values about their mean.
+ * <p>
+ * This is defined as SSTO
+ * <a href="http://www.xycoon.com/SumOfSquares.htm">here</a>.</p>
+ * <p>
+ * If <code>n < 2</code>, this returns <code>Double.NaN</code>.</p>
+ *
+ * @return sum of squared deviations of y values
+ */
+ public double getTotalSumSquares() {
+ if (n < 2) {
+ return Double.NaN;
+ }
+ return sumYY;
+ }
+
+ /**
+ * Returns the sum of squared deviations of the x values about their mean.
+ *
+ * If <code>n < 2</code>, this returns <code>Double.NaN</code>.</p>
+ *
+ * @return sum of squared deviations of x values
+ */
+ public double getXSumSquares() {
+ if (n < 2) {
+ return Double.NaN;
+ }
+ return sumXX;
+ }
+
+ /**
+ * Returns the sum of crossproducts, x<sub>i</sub>*y<sub>i</sub>.
+ *
+ * @return sum of cross products
+ */
+ public double getSumOfCrossProducts() {
+ return sumXY;
+ }
+
+ /**
+ * Returns the sum of squared deviations of the predicted y values about
+ * their mean (which equals the mean of y).
+ * <p>
+ * This is usually abbreviated SSR or SSM. It is defined as SSM
+ * <a href="http://www.xycoon.com/SumOfSquares.htm">here</a></p>
+ * <p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>At least two observations (with at least two different x values)
+ * must have been added before invoking this method. If this method is
+ * invoked before a model can be estimated, <code>Double.NaN</code> is
+ * returned.
+ * </li></ul></p>
+ *
+ * @return sum of squared deviations of predicted y values
+ */
+ public double getRegressionSumSquares() {
+ return getRegressionSumSquares(getSlope());
+ }
+
+ /**
+ * Returns the sum of squared errors divided by the degrees of freedom,
+ * usually abbreviated MSE.
+ * <p>
+ * If there are fewer than <strong>three</strong> data pairs in the model,
+ * or if there is no variation in <code>x</code>, this returns
+ * <code>Double.NaN</code>.</p>
+ *
+ * @return sum of squared deviations of y values
+ */
+ public double getMeanSquareError() {
+ if (n < 3) {
+ return Double.NaN;
+ }
+ return getSumSquaredErrors() / (n - 2);
+ }
+
+ /**
+ * Returns <a href="http://mathworld.wolfram.com/CorrelationCoefficient.html">
+ * Pearson's product moment correlation coefficient</a>,
+ * usually denoted r.
+ * <p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>At least two observations (with at least two different x values)
+ * must have been added before invoking this method. If this method is
+ * invoked before a model can be estimated, <code>Double,NaN</code> is
+ * returned.
+ * </li></ul></p>
+ *
+ * @return Pearson's r
+ */
+ public double getR() {
+ double b1 = getSlope();
+ double result = FastMath.sqrt(getRSquare());
+ if (b1 < 0) {
+ result = -result;
+ }
+ return result;
+ }
+
+ /**
+ * Returns the <a href="http://www.xycoon.com/coefficient1.htm">
+ * coefficient of determination</a>,
+ * usually denoted r-square.
+ * <p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>At least two observations (with at least two different x values)
+ * must have been added before invoking this method. If this method is
+ * invoked before a model can be estimated, <code>Double,NaN</code> is
+ * returned.
+ * </li></ul></p>
+ *
+ * @return r-square
+ */
+ public double getRSquare() {
+ double ssto = getTotalSumSquares();
+ return (ssto - getSumSquaredErrors()) / ssto;
+ }
+
+ /**
+ * Returns the <a href="http://www.xycoon.com/standarderrorb0.htm">
+ * standard error of the intercept estimate</a>,
+ * usually denoted s(b0).
+ * <p>
+ * If there are fewer that <strong>three</strong> observations in the
+ * model, or if there is no variation in x, this returns
+ * <code>Double.NaN</code>.</p>
+ *
+ * @return standard error associated with intercept estimate
+ */
+ public double getInterceptStdErr() {
+ return FastMath.sqrt(
+ getMeanSquareError() * ((1d / (double) n) + (xbar * xbar) / sumXX));
+ }
+
+ /**
+ * Returns the <a href="http://www.xycoon.com/standerrorb(1).htm">standard
+ * error of the slope estimate</a>,
+ * usually denoted s(b1).
+ * <p>
+ * If there are fewer that <strong>three</strong> data pairs in the model,
+ * or if there is no variation in x, this returns <code>Double.NaN</code>.
+ * </p>
+ *
+ * @return standard error associated with slope estimate
+ */
+ public double getSlopeStdErr() {
+ return FastMath.sqrt(getMeanSquareError() / sumXX);
+ }
+
+ /**
+ * Returns the half-width of a 95% confidence interval for the slope
+ * estimate.
+ * <p>
+ * The 95% confidence interval is</p>
+ * <p>
+ * <code>(getSlope() - getSlopeConfidenceInterval(),
+ * getSlope() + getSlopeConfidenceInterval())</code></p>
+ * <p>
+ * If there are fewer that <strong>three</strong> observations in the
+ * model, or if there is no variation in x, this returns
+ * <code>Double.NaN</code>.</p>
+ * <p>
+ * <strong>Usage Note</strong>:<br>
+ * The validity of this statistic depends on the assumption that the
+ * observations included in the model are drawn from a
+ * <a href="http://mathworld.wolfram.com/BivariateNormalDistribution.html">
+ * Bivariate Normal Distribution</a>.</p>
+ *
+ * @return half-width of 95% confidence interval for the slope estimate
+ * @throws MathException if the confidence interval can not be computed.
+ */
+ public double getSlopeConfidenceInterval() throws MathException {
+ return getSlopeConfidenceInterval(0.05d);
+ }
+
+ /**
+ * Returns the half-width of a (100-100*alpha)% confidence interval for
+ * the slope estimate.
+ * <p>
+ * The (100-100*alpha)% confidence interval is </p>
+ * <p>
+ * <code>(getSlope() - getSlopeConfidenceInterval(),
+ * getSlope() + getSlopeConfidenceInterval())</code></p>
+ * <p>
+ * To request, for example, a 99% confidence interval, use
+ * <code>alpha = .01</code></p>
+ * <p>
+ * <strong>Usage Note</strong>:<br>
+ * The validity of this statistic depends on the assumption that the
+ * observations included in the model are drawn from a
+ * <a href="http://mathworld.wolfram.com/BivariateNormalDistribution.html">
+ * Bivariate Normal Distribution</a>.</p>
+ * <p>
+ * <strong> Preconditions:</strong><ul>
+ * <li>If there are fewer that <strong>three</strong> observations in the
+ * model, or if there is no variation in x, this returns
+ * <code>Double.NaN</code>.
+ * </li>
+ * <li><code>(0 < alpha < 1)</code>; otherwise an
+ * <code>IllegalArgumentException</code> is thrown.
+ * </li></ul></p>
+ *
+ * @param alpha the desired significance level
+ * @return half-width of 95% confidence interval for the slope estimate
+ * @throws MathException if the confidence interval can not be computed.
+ */
+ public double getSlopeConfidenceInterval(double alpha)
+ throws MathException {
+ if (alpha >= 1 || alpha <= 0) {
+ throw MathRuntimeException.createIllegalArgumentException(
+ LocalizedFormats.OUT_OF_BOUND_SIGNIFICANCE_LEVEL,
+ alpha, 0.0, 1.0);
+ }
+ return getSlopeStdErr() *
+ distribution.inverseCumulativeProbability(1d - alpha / 2d);
+ }
+
+ /**
+ * Returns the significance level of the slope (equiv) correlation.
+ * <p>
+ * Specifically, the returned value is the smallest <code>alpha</code>
+ * such that the slope confidence interval with significance level
+ * equal to <code>alpha</code> does not include <code>0</code>.
+ * On regression output, this is often denoted <code>Prob(|t| > 0)</code>
+ * </p><p>
+ * <strong>Usage Note</strong>:<br>
+ * The validity of this statistic depends on the assumption that the
+ * observations included in the model are drawn from a
+ * <a href="http://mathworld.wolfram.com/BivariateNormalDistribution.html">
+ * Bivariate Normal Distribution</a>.</p>
+ * <p>
+ * If there are fewer that <strong>three</strong> observations in the
+ * model, or if there is no variation in x, this returns
+ * <code>Double.NaN</code>.</p>
+ *
+ * @return significance level for slope/correlation
+ * @throws MathException if the significance level can not be computed.
+ */
+ public double getSignificance() throws MathException {
+ return 2d * (1.0 - distribution.cumulativeProbability(
+ FastMath.abs(getSlope()) / getSlopeStdErr()));
+ }
+
+ // ---------------------Private methods-----------------------------------
+
+ /**
+ * Returns the intercept of the estimated regression line, given the slope.
+ * <p>
+ * Will return <code>NaN</code> if slope is <code>NaN</code>.</p>
+ *
+ * @param slope current slope
+ * @return the intercept of the regression line
+ */
+ private double getIntercept(double slope) {
+ return (sumY - slope * sumX) / n;
+ }
+
+ /**
+ * Computes SSR from b1.
+ *
+ * @param slope regression slope estimate
+ * @return sum of squared deviations of predicted y values
+ */
+ private double getRegressionSumSquares(double slope) {
+ return slope * slope * sumXX;
+ }
+
+ /**
+ * Modify the distribution used to compute inference statistics.
+ * @param value the new distribution
+ * @since 1.2
+ * @deprecated in 2.2 (to be removed in 3.0).
+ */
+ @Deprecated
+ public void setDistribution(TDistribution value) {
+ distribution = value;
+
+ // modify degrees of freedom
+ if (n > 2) {
+ distribution.setDegreesOfFreedom(n - 2);
+ }
+ }
+}
diff --git a/src/main/java/org/apache/commons/math/stat/regression/package.html b/src/main/java/org/apache/commons/math/stat/regression/package.html
new file mode 100644
index 0000000..2538c6e
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/regression/package.html
@@ -0,0 +1,22 @@
+<html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+ <!-- $Revision: 480440 $ $Date: 2006-11-29 08:14:12 +0100 (mer. 29 nov. 2006) $ -->
+ <body>
+ Statistical routines involving multivariate data.
+ </body>
+</html>