summaryrefslogtreecommitdiff
path: root/src/main/java/org/apache/commons/math/stat/descriptive/AggregateSummaryStatistics.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/main/java/org/apache/commons/math/stat/descriptive/AggregateSummaryStatistics.java')
-rw-r--r--src/main/java/org/apache/commons/math/stat/descriptive/AggregateSummaryStatistics.java416
1 files changed, 416 insertions, 0 deletions
diff --git a/src/main/java/org/apache/commons/math/stat/descriptive/AggregateSummaryStatistics.java b/src/main/java/org/apache/commons/math/stat/descriptive/AggregateSummaryStatistics.java
new file mode 100644
index 0000000..98c58c8
--- /dev/null
+++ b/src/main/java/org/apache/commons/math/stat/descriptive/AggregateSummaryStatistics.java
@@ -0,0 +1,416 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.math.stat.descriptive;
+
+import java.io.Serializable;
+import java.util.Collection;
+import java.util.Iterator;
+
+/**
+ * <p>
+ * An aggregator for {@code SummaryStatistics} from several data sets or
+ * data set partitions. In its simplest usage mode, the client creates an
+ * instance via the zero-argument constructor, then uses
+ * {@link #createContributingStatistics()} to obtain a {@code SummaryStatistics}
+ * for each individual data set / partition. The per-set statistics objects
+ * are used as normal, and at any time the aggregate statistics for all the
+ * contributors can be obtained from this object.
+ * </p><p>
+ * Clients with specialized requirements can use alternative constructors to
+ * control the statistics implementations and initial values used by the
+ * contributing and the internal aggregate {@code SummaryStatistics} objects.
+ * </p><p>
+ * A static {@link #aggregate(Collection)} method is also included that computes
+ * aggregate statistics directly from a Collection of SummaryStatistics instances.
+ * </p><p>
+ * When {@link #createContributingStatistics()} is used to create SummaryStatistics
+ * instances to be aggregated concurrently, the created instances'
+ * {@link SummaryStatistics#addValue(double)} methods must synchronize on the aggregating
+ * instance maintained by this class. In multithreaded environments, if the functionality
+ * provided by {@link #aggregate(Collection)} is adequate, that method should be used
+ * to avoid unecessary computation and synchronization delays.</p>
+ *
+ * @since 2.0
+ * @version $Revision: 811833 $ $Date: 2009-09-06 18:27:50 +0200 (dim. 06 sept. 2009) $
+ *
+ */
+public class AggregateSummaryStatistics implements StatisticalSummary,
+ Serializable {
+
+
+ /** Serializable version identifier */
+ private static final long serialVersionUID = -8207112444016386906L;
+
+ /**
+ * A SummaryStatistics serving as a prototype for creating SummaryStatistics
+ * contributing to this aggregate
+ */
+ private final SummaryStatistics statisticsPrototype;
+
+ /**
+ * The SummaryStatistics in which aggregate statistics are accumulated.
+ */
+ private final SummaryStatistics statistics;
+
+ /**
+ * Initializes a new AggregateSummaryStatistics with default statistics
+ * implementations.
+ *
+ */
+ public AggregateSummaryStatistics() {
+ this(new SummaryStatistics());
+ }
+
+ /**
+ * Initializes a new AggregateSummaryStatistics with the specified statistics
+ * object as a prototype for contributing statistics and for the internal
+ * aggregate statistics. This provides for customized statistics implementations
+ * to be used by contributing and aggregate statistics.
+ *
+ * @param prototypeStatistics a {@code SummaryStatistics} serving as a
+ * prototype both for the internal aggregate statistics and for
+ * contributing statistics obtained via the
+ * {@code createContributingStatistics()} method. Being a prototype
+ * means that other objects are initialized by copying this object's state.
+ * If {@code null}, a new, default statistics object is used. Any statistic
+ * values in the prototype are propagated to contributing statistics
+ * objects and (once) into these aggregate statistics.
+ * @see #createContributingStatistics()
+ */
+ public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics) {
+ this(prototypeStatistics,
+ prototypeStatistics == null ? null : new SummaryStatistics(prototypeStatistics));
+ }
+
+ /**
+ * Initializes a new AggregateSummaryStatistics with the specified statistics
+ * object as a prototype for contributing statistics and for the internal
+ * aggregate statistics. This provides for different statistics implementations
+ * to be used by contributing and aggregate statistics and for an initial
+ * state to be supplied for the aggregate statistics.
+ *
+ * @param prototypeStatistics a {@code SummaryStatistics} serving as a
+ * prototype both for the internal aggregate statistics and for
+ * contributing statistics obtained via the
+ * {@code createContributingStatistics()} method. Being a prototype
+ * means that other objects are initialized by copying this object's state.
+ * If {@code null}, a new, default statistics object is used. Any statistic
+ * values in the prototype are propagated to contributing statistics
+ * objects, but not into these aggregate statistics.
+ * @param initialStatistics a {@code SummaryStatistics} to serve as the
+ * internal aggregate statistics object. If {@code null}, a new, default
+ * statistics object is used.
+ * @see #createContributingStatistics()
+ */
+ public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics,
+ SummaryStatistics initialStatistics) {
+ this.statisticsPrototype =
+ (prototypeStatistics == null) ? new SummaryStatistics() : prototypeStatistics;
+ this.statistics =
+ (initialStatistics == null) ? new SummaryStatistics() : initialStatistics;
+ }
+
+ /**
+ * {@inheritDoc}. This version returns the maximum over all the aggregated
+ * data.
+ *
+ * @see StatisticalSummary#getMax()
+ */
+ public double getMax() {
+ synchronized (statistics) {
+ return statistics.getMax();
+ }
+ }
+
+ /**
+ * {@inheritDoc}. This version returns the mean of all the aggregated data.
+ *
+ * @see StatisticalSummary#getMean()
+ */
+ public double getMean() {
+ synchronized (statistics) {
+ return statistics.getMean();
+ }
+ }
+
+ /**
+ * {@inheritDoc}. This version returns the minimum over all the aggregated
+ * data.
+ *
+ * @see StatisticalSummary#getMin()
+ */
+ public double getMin() {
+ synchronized (statistics) {
+ return statistics.getMin();
+ }
+ }
+
+ /**
+ * {@inheritDoc}. This version returns a count of all the aggregated data.
+ *
+ * @see StatisticalSummary#getN()
+ */
+ public long getN() {
+ synchronized (statistics) {
+ return statistics.getN();
+ }
+ }
+
+ /**
+ * {@inheritDoc}. This version returns the standard deviation of all the
+ * aggregated data.
+ *
+ * @see StatisticalSummary#getStandardDeviation()
+ */
+ public double getStandardDeviation() {
+ synchronized (statistics) {
+ return statistics.getStandardDeviation();
+ }
+ }
+
+ /**
+ * {@inheritDoc}. This version returns a sum of all the aggregated data.
+ *
+ * @see StatisticalSummary#getSum()
+ */
+ public double getSum() {
+ synchronized (statistics) {
+ return statistics.getSum();
+ }
+ }
+
+ /**
+ * {@inheritDoc}. This version returns the variance of all the aggregated
+ * data.
+ *
+ * @see StatisticalSummary#getVariance()
+ */
+ public double getVariance() {
+ synchronized (statistics) {
+ return statistics.getVariance();
+ }
+ }
+
+ /**
+ * Returns the sum of the logs of all the aggregated data.
+ *
+ * @return the sum of logs
+ * @see SummaryStatistics#getSumOfLogs()
+ */
+ public double getSumOfLogs() {
+ synchronized (statistics) {
+ return statistics.getSumOfLogs();
+ }
+ }
+
+ /**
+ * Returns the geometric mean of all the aggregated data.
+ *
+ * @return the geometric mean
+ * @see SummaryStatistics#getGeometricMean()
+ */
+ public double getGeometricMean() {
+ synchronized (statistics) {
+ return statistics.getGeometricMean();
+ }
+ }
+
+ /**
+ * Returns the sum of the squares of all the aggregated data.
+ *
+ * @return The sum of squares
+ * @see SummaryStatistics#getSumsq()
+ */
+ public double getSumsq() {
+ synchronized (statistics) {
+ return statistics.getSumsq();
+ }
+ }
+
+ /**
+ * Returns a statistic related to the Second Central Moment. Specifically,
+ * what is returned is the sum of squared deviations from the sample mean
+ * among the all of the aggregated data.
+ *
+ * @return second central moment statistic
+ * @see SummaryStatistics#getSecondMoment()
+ */
+ public double getSecondMoment() {
+ synchronized (statistics) {
+ return statistics.getSecondMoment();
+ }
+ }
+
+ /**
+ * Return a {@link StatisticalSummaryValues} instance reporting current
+ * aggregate statistics.
+ *
+ * @return Current values of aggregate statistics
+ */
+ public StatisticalSummary getSummary() {
+ synchronized (statistics) {
+ return new StatisticalSummaryValues(getMean(), getVariance(), getN(),
+ getMax(), getMin(), getSum());
+ }
+ }
+
+ /**
+ * Creates and returns a {@code SummaryStatistics} whose data will be
+ * aggregated with those of this {@code AggregateSummaryStatistics}.
+ *
+ * @return a {@code SummaryStatistics} whose data will be aggregated with
+ * those of this {@code AggregateSummaryStatistics}. The initial state
+ * is a copy of the configured prototype statistics.
+ */
+ public SummaryStatistics createContributingStatistics() {
+ SummaryStatistics contributingStatistics
+ = new AggregatingSummaryStatistics(statistics);
+
+ SummaryStatistics.copy(statisticsPrototype, contributingStatistics);
+
+ return contributingStatistics;
+ }
+
+ /**
+ * Computes aggregate summary statistics. This method can be used to combine statistics
+ * computed over partitions or subsamples - i.e., the StatisticalSummaryValues returned
+ * should contain the same values that would have been obtained by computing a single
+ * StatisticalSummary over the combined dataset.
+ * <p>
+ * Returns null if the collection is empty or null.
+ * </p>
+ *
+ * @param statistics collection of SummaryStatistics to aggregate
+ * @return summary statistics for the combined dataset
+ */
+ public static StatisticalSummaryValues aggregate(Collection<SummaryStatistics> statistics) {
+ if (statistics == null) {
+ return null;
+ }
+ Iterator<SummaryStatistics> iterator = statistics.iterator();
+ if (!iterator.hasNext()) {
+ return null;
+ }
+ SummaryStatistics current = iterator.next();
+ long n = current.getN();
+ double min = current.getMin();
+ double sum = current.getSum();
+ double max = current.getMax();
+ double m2 = current.getSecondMoment();
+ double mean = current.getMean();
+ while (iterator.hasNext()) {
+ current = iterator.next();
+ if (current.getMin() < min || Double.isNaN(min)) {
+ min = current.getMin();
+ }
+ if (current.getMax() > max || Double.isNaN(max)) {
+ max = current.getMax();
+ }
+ sum += current.getSum();
+ final double oldN = n;
+ final double curN = current.getN();
+ n += curN;
+ final double meanDiff = current.getMean() - mean;
+ mean = sum / n;
+ m2 = m2 + current.getSecondMoment() + meanDiff * meanDiff * oldN * curN / n;
+ }
+ final double variance;
+ if (n == 0) {
+ variance = Double.NaN;
+ } else if (n == 1) {
+ variance = 0d;
+ } else {
+ variance = m2 / (n - 1);
+ }
+ return new StatisticalSummaryValues(mean, variance, n, max, min, sum);
+ }
+
+ /**
+ * A SummaryStatistics that also forwards all values added to it to a second
+ * {@code SummaryStatistics} for aggregation.
+ *
+ * @since 2.0
+ */
+ private static class AggregatingSummaryStatistics extends SummaryStatistics {
+
+ /**
+ * The serialization version of this class
+ */
+ private static final long serialVersionUID = 1L;
+
+ /**
+ * An additional SummaryStatistics into which values added to these
+ * statistics (and possibly others) are aggregated
+ */
+ private final SummaryStatistics aggregateStatistics;
+
+ /**
+ * Initializes a new AggregatingSummaryStatistics with the specified
+ * aggregate statistics object
+ *
+ * @param aggregateStatistics a {@code SummaryStatistics} into which
+ * values added to this statistics object should be aggregated
+ */
+ public AggregatingSummaryStatistics(SummaryStatistics aggregateStatistics) {
+ this.aggregateStatistics = aggregateStatistics;
+ }
+
+ /**
+ * {@inheritDoc}. This version adds the provided value to the configured
+ * aggregate after adding it to these statistics.
+ *
+ * @see SummaryStatistics#addValue(double)
+ */
+ @Override
+ public void addValue(double value) {
+ super.addValue(value);
+ synchronized (aggregateStatistics) {
+ aggregateStatistics.addValue(value);
+ }
+ }
+
+ /**
+ * Returns true iff <code>object</code> is a
+ * <code>SummaryStatistics</code> instance and all statistics have the
+ * same values as this.
+ * @param object the object to test equality against.
+ * @return true if object equals this
+ */
+ @Override
+ public boolean equals(Object object) {
+ if (object == this) {
+ return true;
+ }
+ if (object instanceof AggregatingSummaryStatistics == false) {
+ return false;
+ }
+ AggregatingSummaryStatistics stat = (AggregatingSummaryStatistics)object;
+ return super.equals(stat) &&
+ aggregateStatistics.equals(stat.aggregateStatistics);
+ }
+
+ /**
+ * Returns hash code based on values of statistics
+ * @return hash code
+ */
+ @Override
+ public int hashCode() {
+ return 123 + super.hashCode() + aggregateStatistics.hashCode();
+ }
+ }
+}