diff options
Diffstat (limited to 'src/main/java/org/apache/commons/math/stat/descriptive/AggregateSummaryStatistics.java')
-rw-r--r-- | src/main/java/org/apache/commons/math/stat/descriptive/AggregateSummaryStatistics.java | 416 |
1 files changed, 416 insertions, 0 deletions
diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/AggregateSummaryStatistics.java b/src/main/java/org/apache/commons/math/stat/descriptive/AggregateSummaryStatistics.java new file mode 100644 index 0000000..98c58c8 --- /dev/null +++ b/src/main/java/org/apache/commons/math/stat/descriptive/AggregateSummaryStatistics.java @@ -0,0 +1,416 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.math.stat.descriptive; + +import java.io.Serializable; +import java.util.Collection; +import java.util.Iterator; + +/** + * <p> + * An aggregator for {@code SummaryStatistics} from several data sets or + * data set partitions. In its simplest usage mode, the client creates an + * instance via the zero-argument constructor, then uses + * {@link #createContributingStatistics()} to obtain a {@code SummaryStatistics} + * for each individual data set / partition. The per-set statistics objects + * are used as normal, and at any time the aggregate statistics for all the + * contributors can be obtained from this object. + * </p><p> + * Clients with specialized requirements can use alternative constructors to + * control the statistics implementations and initial values used by the + * contributing and the internal aggregate {@code SummaryStatistics} objects. + * </p><p> + * A static {@link #aggregate(Collection)} method is also included that computes + * aggregate statistics directly from a Collection of SummaryStatistics instances. + * </p><p> + * When {@link #createContributingStatistics()} is used to create SummaryStatistics + * instances to be aggregated concurrently, the created instances' + * {@link SummaryStatistics#addValue(double)} methods must synchronize on the aggregating + * instance maintained by this class. In multithreaded environments, if the functionality + * provided by {@link #aggregate(Collection)} is adequate, that method should be used + * to avoid unecessary computation and synchronization delays.</p> + * + * @since 2.0 + * @version $Revision: 811833 $ $Date: 2009-09-06 18:27:50 +0200 (dim. 06 sept. 2009) $ + * + */ +public class AggregateSummaryStatistics implements StatisticalSummary, + Serializable { + + + /** Serializable version identifier */ + private static final long serialVersionUID = -8207112444016386906L; + + /** + * A SummaryStatistics serving as a prototype for creating SummaryStatistics + * contributing to this aggregate + */ + private final SummaryStatistics statisticsPrototype; + + /** + * The SummaryStatistics in which aggregate statistics are accumulated. + */ + private final SummaryStatistics statistics; + + /** + * Initializes a new AggregateSummaryStatistics with default statistics + * implementations. + * + */ + public AggregateSummaryStatistics() { + this(new SummaryStatistics()); + } + + /** + * Initializes a new AggregateSummaryStatistics with the specified statistics + * object as a prototype for contributing statistics and for the internal + * aggregate statistics. This provides for customized statistics implementations + * to be used by contributing and aggregate statistics. + * + * @param prototypeStatistics a {@code SummaryStatistics} serving as a + * prototype both for the internal aggregate statistics and for + * contributing statistics obtained via the + * {@code createContributingStatistics()} method. Being a prototype + * means that other objects are initialized by copying this object's state. + * If {@code null}, a new, default statistics object is used. Any statistic + * values in the prototype are propagated to contributing statistics + * objects and (once) into these aggregate statistics. + * @see #createContributingStatistics() + */ + public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics) { + this(prototypeStatistics, + prototypeStatistics == null ? null : new SummaryStatistics(prototypeStatistics)); + } + + /** + * Initializes a new AggregateSummaryStatistics with the specified statistics + * object as a prototype for contributing statistics and for the internal + * aggregate statistics. This provides for different statistics implementations + * to be used by contributing and aggregate statistics and for an initial + * state to be supplied for the aggregate statistics. + * + * @param prototypeStatistics a {@code SummaryStatistics} serving as a + * prototype both for the internal aggregate statistics and for + * contributing statistics obtained via the + * {@code createContributingStatistics()} method. Being a prototype + * means that other objects are initialized by copying this object's state. + * If {@code null}, a new, default statistics object is used. Any statistic + * values in the prototype are propagated to contributing statistics + * objects, but not into these aggregate statistics. + * @param initialStatistics a {@code SummaryStatistics} to serve as the + * internal aggregate statistics object. If {@code null}, a new, default + * statistics object is used. + * @see #createContributingStatistics() + */ + public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics, + SummaryStatistics initialStatistics) { + this.statisticsPrototype = + (prototypeStatistics == null) ? new SummaryStatistics() : prototypeStatistics; + this.statistics = + (initialStatistics == null) ? new SummaryStatistics() : initialStatistics; + } + + /** + * {@inheritDoc}. This version returns the maximum over all the aggregated + * data. + * + * @see StatisticalSummary#getMax() + */ + public double getMax() { + synchronized (statistics) { + return statistics.getMax(); + } + } + + /** + * {@inheritDoc}. This version returns the mean of all the aggregated data. + * + * @see StatisticalSummary#getMean() + */ + public double getMean() { + synchronized (statistics) { + return statistics.getMean(); + } + } + + /** + * {@inheritDoc}. This version returns the minimum over all the aggregated + * data. + * + * @see StatisticalSummary#getMin() + */ + public double getMin() { + synchronized (statistics) { + return statistics.getMin(); + } + } + + /** + * {@inheritDoc}. This version returns a count of all the aggregated data. + * + * @see StatisticalSummary#getN() + */ + public long getN() { + synchronized (statistics) { + return statistics.getN(); + } + } + + /** + * {@inheritDoc}. This version returns the standard deviation of all the + * aggregated data. + * + * @see StatisticalSummary#getStandardDeviation() + */ + public double getStandardDeviation() { + synchronized (statistics) { + return statistics.getStandardDeviation(); + } + } + + /** + * {@inheritDoc}. This version returns a sum of all the aggregated data. + * + * @see StatisticalSummary#getSum() + */ + public double getSum() { + synchronized (statistics) { + return statistics.getSum(); + } + } + + /** + * {@inheritDoc}. This version returns the variance of all the aggregated + * data. + * + * @see StatisticalSummary#getVariance() + */ + public double getVariance() { + synchronized (statistics) { + return statistics.getVariance(); + } + } + + /** + * Returns the sum of the logs of all the aggregated data. + * + * @return the sum of logs + * @see SummaryStatistics#getSumOfLogs() + */ + public double getSumOfLogs() { + synchronized (statistics) { + return statistics.getSumOfLogs(); + } + } + + /** + * Returns the geometric mean of all the aggregated data. + * + * @return the geometric mean + * @see SummaryStatistics#getGeometricMean() + */ + public double getGeometricMean() { + synchronized (statistics) { + return statistics.getGeometricMean(); + } + } + + /** + * Returns the sum of the squares of all the aggregated data. + * + * @return The sum of squares + * @see SummaryStatistics#getSumsq() + */ + public double getSumsq() { + synchronized (statistics) { + return statistics.getSumsq(); + } + } + + /** + * Returns a statistic related to the Second Central Moment. Specifically, + * what is returned is the sum of squared deviations from the sample mean + * among the all of the aggregated data. + * + * @return second central moment statistic + * @see SummaryStatistics#getSecondMoment() + */ + public double getSecondMoment() { + synchronized (statistics) { + return statistics.getSecondMoment(); + } + } + + /** + * Return a {@link StatisticalSummaryValues} instance reporting current + * aggregate statistics. + * + * @return Current values of aggregate statistics + */ + public StatisticalSummary getSummary() { + synchronized (statistics) { + return new StatisticalSummaryValues(getMean(), getVariance(), getN(), + getMax(), getMin(), getSum()); + } + } + + /** + * Creates and returns a {@code SummaryStatistics} whose data will be + * aggregated with those of this {@code AggregateSummaryStatistics}. + * + * @return a {@code SummaryStatistics} whose data will be aggregated with + * those of this {@code AggregateSummaryStatistics}. The initial state + * is a copy of the configured prototype statistics. + */ + public SummaryStatistics createContributingStatistics() { + SummaryStatistics contributingStatistics + = new AggregatingSummaryStatistics(statistics); + + SummaryStatistics.copy(statisticsPrototype, contributingStatistics); + + return contributingStatistics; + } + + /** + * Computes aggregate summary statistics. This method can be used to combine statistics + * computed over partitions or subsamples - i.e., the StatisticalSummaryValues returned + * should contain the same values that would have been obtained by computing a single + * StatisticalSummary over the combined dataset. + * <p> + * Returns null if the collection is empty or null. + * </p> + * + * @param statistics collection of SummaryStatistics to aggregate + * @return summary statistics for the combined dataset + */ + public static StatisticalSummaryValues aggregate(Collection<SummaryStatistics> statistics) { + if (statistics == null) { + return null; + } + Iterator<SummaryStatistics> iterator = statistics.iterator(); + if (!iterator.hasNext()) { + return null; + } + SummaryStatistics current = iterator.next(); + long n = current.getN(); + double min = current.getMin(); + double sum = current.getSum(); + double max = current.getMax(); + double m2 = current.getSecondMoment(); + double mean = current.getMean(); + while (iterator.hasNext()) { + current = iterator.next(); + if (current.getMin() < min || Double.isNaN(min)) { + min = current.getMin(); + } + if (current.getMax() > max || Double.isNaN(max)) { + max = current.getMax(); + } + sum += current.getSum(); + final double oldN = n; + final double curN = current.getN(); + n += curN; + final double meanDiff = current.getMean() - mean; + mean = sum / n; + m2 = m2 + current.getSecondMoment() + meanDiff * meanDiff * oldN * curN / n; + } + final double variance; + if (n == 0) { + variance = Double.NaN; + } else if (n == 1) { + variance = 0d; + } else { + variance = m2 / (n - 1); + } + return new StatisticalSummaryValues(mean, variance, n, max, min, sum); + } + + /** + * A SummaryStatistics that also forwards all values added to it to a second + * {@code SummaryStatistics} for aggregation. + * + * @since 2.0 + */ + private static class AggregatingSummaryStatistics extends SummaryStatistics { + + /** + * The serialization version of this class + */ + private static final long serialVersionUID = 1L; + + /** + * An additional SummaryStatistics into which values added to these + * statistics (and possibly others) are aggregated + */ + private final SummaryStatistics aggregateStatistics; + + /** + * Initializes a new AggregatingSummaryStatistics with the specified + * aggregate statistics object + * + * @param aggregateStatistics a {@code SummaryStatistics} into which + * values added to this statistics object should be aggregated + */ + public AggregatingSummaryStatistics(SummaryStatistics aggregateStatistics) { + this.aggregateStatistics = aggregateStatistics; + } + + /** + * {@inheritDoc}. This version adds the provided value to the configured + * aggregate after adding it to these statistics. + * + * @see SummaryStatistics#addValue(double) + */ + @Override + public void addValue(double value) { + super.addValue(value); + synchronized (aggregateStatistics) { + aggregateStatistics.addValue(value); + } + } + + /** + * Returns true iff <code>object</code> is a + * <code>SummaryStatistics</code> instance and all statistics have the + * same values as this. + * @param object the object to test equality against. + * @return true if object equals this + */ + @Override + public boolean equals(Object object) { + if (object == this) { + return true; + } + if (object instanceof AggregatingSummaryStatistics == false) { + return false; + } + AggregatingSummaryStatistics stat = (AggregatingSummaryStatistics)object; + return super.equals(stat) && + aggregateStatistics.equals(stat.aggregateStatistics); + } + + /** + * Returns hash code based on values of statistics + * @return hash code + */ + @Override + public int hashCode() { + return 123 + super.hashCode() + aggregateStatistics.hashCode(); + } + } +} |