diff options
Diffstat (limited to 'src/main/java/org/apache/commons/math3/stat/descriptive')
41 files changed, 10963 insertions, 0 deletions
diff --git a/src/main/java/org/apache/commons/math3/stat/descriptive/AbstractStorelessUnivariateStatistic.java b/src/main/java/org/apache/commons/math3/stat/descriptive/AbstractStorelessUnivariateStatistic.java new file mode 100644 index 0000000..4249994 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/stat/descriptive/AbstractStorelessUnivariateStatistic.java @@ -0,0 +1,187 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.stat.descriptive; + +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.exception.MathIllegalArgumentException; +import org.apache.commons.math3.exception.NullArgumentException; +import org.apache.commons.math3.util.MathUtils; +import org.apache.commons.math3.util.Precision; + +/** + * + * Abstract implementation of the {@link StorelessUnivariateStatistic} interface. + * <p> + * Provides default <code>evaluate()</code> and <code>incrementAll(double[])</code> + * implementations.</p> + * <p> + * <strong>Note that these implementations are not synchronized.</strong></p> + * + */ +public abstract class AbstractStorelessUnivariateStatistic + extends AbstractUnivariateStatistic + implements StorelessUnivariateStatistic { + + /** + * This default implementation calls {@link #clear}, then invokes + * {@link #increment} in a loop over the the input array, and then uses + * {@link #getResult} to compute the return value. + * <p> + * Note that this implementation changes the internal state of the + * statistic. Its side effects are the same as invoking {@link #clear} and + * then {@link #incrementAll(double[])}.</p> + * <p> + * Implementations may override this method with a more efficient and + * possibly more accurate implementation that works directly with the + * input array.</p> + * <p> + * If the array is null, a MathIllegalArgumentException is thrown.</p> + * @param values input array + * @return the value of the statistic applied to the input array + * @throws MathIllegalArgumentException if values is null + * @see org.apache.commons.math3.stat.descriptive.UnivariateStatistic#evaluate(double[]) + */ + @Override + public double evaluate(final double[] values) throws MathIllegalArgumentException { + if (values == null) { + throw new NullArgumentException(LocalizedFormats.INPUT_ARRAY); + } + return evaluate(values, 0, values.length); + } + + /** + * This default implementation calls {@link #clear}, then invokes + * {@link #increment} in a loop over the specified portion of the input + * array, and then uses {@link #getResult} to compute the return value. + * <p> + * Note that this implementation changes the internal state of the + * statistic. Its side effects are the same as invoking {@link #clear} and + * then {@link #incrementAll(double[], int, int)}.</p> + * <p> + * Implementations may override this method with a more efficient and + * possibly more accurate implementation that works directly with the + * input array.</p> + * <p> + * If the array is null or the index parameters are not valid, an + * MathIllegalArgumentException is thrown.</p> + * @param values the input array + * @param begin the index of the first element to include + * @param length the number of elements to include + * @return the value of the statistic applied to the included array entries + * @throws MathIllegalArgumentException if the array is null or the indices are not valid + * @see org.apache.commons.math3.stat.descriptive.UnivariateStatistic#evaluate(double[], int, int) + */ + @Override + public double evaluate(final double[] values, final int begin, + final int length) throws MathIllegalArgumentException { + if (test(values, begin, length)) { + clear(); + incrementAll(values, begin, length); + } + return getResult(); + } + + /** + * {@inheritDoc} + */ + @Override + public abstract StorelessUnivariateStatistic copy(); + + /** + * {@inheritDoc} + */ + public abstract void clear(); + + /** + * {@inheritDoc} + */ + public abstract double getResult(); + + /** + * {@inheritDoc} + */ + public abstract void increment(final double d); + + /** + * This default implementation just calls {@link #increment} in a loop over + * the input array. + * <p> + * Throws IllegalArgumentException if the input values array is null.</p> + * + * @param values values to add + * @throws MathIllegalArgumentException if values is null + * @see org.apache.commons.math3.stat.descriptive.StorelessUnivariateStatistic#incrementAll(double[]) + */ + public void incrementAll(double[] values) throws MathIllegalArgumentException { + if (values == null) { + throw new NullArgumentException(LocalizedFormats.INPUT_ARRAY); + } + incrementAll(values, 0, values.length); + } + + /** + * This default implementation just calls {@link #increment} in a loop over + * the specified portion of the input array. + * <p> + * Throws IllegalArgumentException if the input values array is null.</p> + * + * @param values array holding values to add + * @param begin index of the first array element to add + * @param length number of array elements to add + * @throws MathIllegalArgumentException if values is null + * @see org.apache.commons.math3.stat.descriptive.StorelessUnivariateStatistic#incrementAll(double[], int, int) + */ + public void incrementAll(double[] values, int begin, int length) throws MathIllegalArgumentException { + if (test(values, begin, length)) { + int k = begin + length; + for (int i = begin; i < k; i++) { + increment(values[i]); + } + } + } + + /** + * Returns true iff <code>object</code> is an + * <code>AbstractStorelessUnivariateStatistic</code> returning the same + * values as this for <code>getResult()</code> and <code>getN()</code> + * @param object object to test equality against. + * @return true if object returns the same value as this + */ + @Override + public boolean equals(Object object) { + if (object == this ) { + return true; + } + if (object instanceof AbstractStorelessUnivariateStatistic == false) { + return false; + } + AbstractStorelessUnivariateStatistic stat = (AbstractStorelessUnivariateStatistic) object; + return Precision.equalsIncludingNaN(stat.getResult(), this.getResult()) && + Precision.equalsIncludingNaN(stat.getN(), this.getN()); + } + + /** + * Returns hash code based on getResult() and getN() + * + * @return hash code + */ + @Override + public int hashCode() { + return 31* (31 + MathUtils.hash(getResult())) + MathUtils.hash(getN()); + } + +} diff --git a/src/main/java/org/apache/commons/math3/stat/descriptive/AbstractUnivariateStatistic.java b/src/main/java/org/apache/commons/math3/stat/descriptive/AbstractUnivariateStatistic.java new file mode 100644 index 0000000..9abe45a --- /dev/null +++ b/src/main/java/org/apache/commons/math3/stat/descriptive/AbstractUnivariateStatistic.java @@ -0,0 +1,263 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.stat.descriptive; + +import org.apache.commons.math3.exception.NotPositiveException; +import org.apache.commons.math3.exception.NullArgumentException; +import org.apache.commons.math3.exception.NumberIsTooLargeException; +import org.apache.commons.math3.exception.MathIllegalArgumentException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.util.MathArrays; + +/** + * Abstract base class for all implementations of the + * {@link UnivariateStatistic} interface. + * <p> + * Provides a default implementation of <code>evaluate(double[]),</code> + * delegating to <code>evaluate(double[], int, int)</code> in the natural way. + * </p> + * <p> + * Also includes a <code>test</code> method that performs generic parameter + * validation for the <code>evaluate</code> methods.</p> + * + */ +public abstract class AbstractUnivariateStatistic + implements UnivariateStatistic { + + /** Stored data. */ + private double[] storedData; + + /** + * Set the data array. + * <p> + * The stored value is a copy of the parameter array, not the array itself. + * </p> + * @param values data array to store (may be null to remove stored data) + * @see #evaluate() + */ + public void setData(final double[] values) { + storedData = (values == null) ? null : values.clone(); + } + + /** + * Get a copy of the stored data array. + * @return copy of the stored data array (may be null) + */ + public double[] getData() { + return (storedData == null) ? null : storedData.clone(); + } + + /** + * Get a reference to the stored data array. + * @return reference to the stored data array (may be null) + */ + protected double[] getDataRef() { + return storedData; + } + + /** + * Set the data array. The input array is copied, not referenced. + * + * @param values data array to store + * @param begin the index of the first element to include + * @param length the number of elements to include + * @throws MathIllegalArgumentException if values is null or the indices + * are not valid + * @see #evaluate() + */ + public void setData(final double[] values, final int begin, final int length) + throws MathIllegalArgumentException { + if (values == null) { + throw new NullArgumentException(LocalizedFormats.INPUT_ARRAY); + } + + if (begin < 0) { + throw new NotPositiveException(LocalizedFormats.START_POSITION, begin); + } + + if (length < 0) { + throw new NotPositiveException(LocalizedFormats.LENGTH, length); + } + + if (begin + length > values.length) { + throw new NumberIsTooLargeException(LocalizedFormats.SUBARRAY_ENDS_AFTER_ARRAY_END, + begin + length, values.length, true); + } + storedData = new double[length]; + System.arraycopy(values, begin, storedData, 0, length); + } + + /** + * Returns the result of evaluating the statistic over the stored data. + * <p> + * The stored array is the one which was set by previous calls to {@link #setData(double[])}. + * </p> + * @return the value of the statistic applied to the stored data + * @throws MathIllegalArgumentException if the stored data array is null + */ + public double evaluate() throws MathIllegalArgumentException { + return evaluate(storedData); + } + + /** + * {@inheritDoc} + */ + public double evaluate(final double[] values) throws MathIllegalArgumentException { + test(values, 0, 0); + return evaluate(values, 0, values.length); + } + + /** + * {@inheritDoc} + */ + public abstract double evaluate(final double[] values, final int begin, final int length) + throws MathIllegalArgumentException; + + /** + * {@inheritDoc} + */ + public abstract UnivariateStatistic copy(); + + /** + * This method is used by <code>evaluate(double[], int, int)</code> methods + * to verify that the input parameters designate a subarray of positive length. + * <p> + * <ul> + * <li>returns <code>true</code> iff the parameters designate a subarray of + * positive length</li> + * <li>throws <code>MathIllegalArgumentException</code> if the array is null or + * or the indices are invalid</li> + * <li>returns <code>false</li> if the array is non-null, but + * <code>length</code> is 0. + * </ul></p> + * + * @param values the input array + * @param begin index of the first array element to include + * @param length the number of elements to include + * @return true if the parameters are valid and designate a subarray of positive length + * @throws MathIllegalArgumentException if the indices are invalid or the array is null + */ + protected boolean test( + final double[] values, + final int begin, + final int length) throws MathIllegalArgumentException { + return MathArrays.verifyValues(values, begin, length, false); + } + + /** + * This method is used by <code>evaluate(double[], int, int)</code> methods + * to verify that the input parameters designate a subarray of positive length. + * <p> + * <ul> + * <li>returns <code>true</code> iff the parameters designate a subarray of + * non-negative length</li> + * <li>throws <code>IllegalArgumentException</code> if the array is null or + * or the indices are invalid</li> + * <li>returns <code>false</li> if the array is non-null, but + * <code>length</code> is 0 unless <code>allowEmpty</code> is <code>true</code> + * </ul></p> + * + * @param values the input array + * @param begin index of the first array element to include + * @param length the number of elements to include + * @param allowEmpty if <code>true</code> then zero length arrays are allowed + * @return true if the parameters are valid + * @throws MathIllegalArgumentException if the indices are invalid or the array is null + * @since 3.0 + */ + protected boolean test(final double[] values, final int begin, + final int length, final boolean allowEmpty) throws MathIllegalArgumentException { + return MathArrays.verifyValues(values, begin, length, allowEmpty); + } + + /** + * This method is used by <code>evaluate(double[], double[], int, int)</code> methods + * to verify that the begin and length parameters designate a subarray of positive length + * and the weights are all non-negative, non-NaN, finite, and not all zero. + * <p> + * <ul> + * <li>returns <code>true</code> iff the parameters designate a subarray of + * positive length and the weights array contains legitimate values.</li> + * <li>throws <code>IllegalArgumentException</code> if any of the following are true: + * <ul><li>the values array is null</li> + * <li>the weights array is null</li> + * <li>the weights array does not have the same length as the values array</li> + * <li>the weights array contains one or more infinite values</li> + * <li>the weights array contains one or more NaN values</li> + * <li>the weights array contains negative values</li> + * <li>the start and length arguments do not determine a valid array</li></ul> + * </li> + * <li>returns <code>false</li> if the array is non-null, but + * <code>length</code> is 0. + * </ul></p> + * + * @param values the input array + * @param weights the weights array + * @param begin index of the first array element to include + * @param length the number of elements to include + * @return true if the parameters are valid and designate a subarray of positive length + * @throws MathIllegalArgumentException if the indices are invalid or the array is null + * @since 2.1 + */ + protected boolean test( + final double[] values, + final double[] weights, + final int begin, + final int length) throws MathIllegalArgumentException { + return MathArrays.verifyValues(values, weights, begin, length, false); + } + + /** + * This method is used by <code>evaluate(double[], double[], int, int)</code> methods + * to verify that the begin and length parameters designate a subarray of positive length + * and the weights are all non-negative, non-NaN, finite, and not all zero. + * <p> + * <ul> + * <li>returns <code>true</code> iff the parameters designate a subarray of + * non-negative length and the weights array contains legitimate values.</li> + * <li>throws <code>MathIllegalArgumentException</code> if any of the following are true: + * <ul><li>the values array is null</li> + * <li>the weights array is null</li> + * <li>the weights array does not have the same length as the values array</li> + * <li>the weights array contains one or more infinite values</li> + * <li>the weights array contains one or more NaN values</li> + * <li>the weights array contains negative values</li> + * <li>the start and length arguments do not determine a valid array</li></ul> + * </li> + * <li>returns <code>false</li> if the array is non-null, but + * <code>length</code> is 0 unless <code>allowEmpty</code> is <code>true</code>. + * </ul></p> + * + * @param values the input array. + * @param weights the weights array. + * @param begin index of the first array element to include. + * @param length the number of elements to include. + * @param allowEmpty if {@code true} than allow zero length arrays to pass. + * @return {@code true} if the parameters are valid. + * @throws NullArgumentException if either of the arrays are null + * @throws MathIllegalArgumentException if the array indices are not valid, + * the weights array contains NaN, infinite or negative elements, or there + * are no positive weights. + * @since 3.0 + */ + protected boolean test(final double[] values, final double[] weights, + final int begin, final int length, final boolean allowEmpty) throws MathIllegalArgumentException { + + return MathArrays.verifyValues(values, weights, begin, length, allowEmpty); + } +} + diff --git a/src/main/java/org/apache/commons/math3/stat/descriptive/AggregateSummaryStatistics.java b/src/main/java/org/apache/commons/math3/stat/descriptive/AggregateSummaryStatistics.java new file mode 100644 index 0000000..6ab3c33 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/stat/descriptive/AggregateSummaryStatistics.java @@ -0,0 +1,422 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.math3.stat.descriptive; + +import java.io.Serializable; +import java.util.Collection; +import java.util.Iterator; + +import org.apache.commons.math3.exception.NullArgumentException; + +/** + * <p> + * An aggregator for {@code SummaryStatistics} from several data sets or + * data set partitions. In its simplest usage mode, the client creates an + * instance via the zero-argument constructor, then uses + * {@link #createContributingStatistics()} to obtain a {@code SummaryStatistics} + * for each individual data set / partition. The per-set statistics objects + * are used as normal, and at any time the aggregate statistics for all the + * contributors can be obtained from this object. + * </p><p> + * Clients with specialized requirements can use alternative constructors to + * control the statistics implementations and initial values used by the + * contributing and the internal aggregate {@code SummaryStatistics} objects. + * </p><p> + * A static {@link #aggregate(Collection)} method is also included that computes + * aggregate statistics directly from a Collection of SummaryStatistics instances. + * </p><p> + * When {@link #createContributingStatistics()} is used to create SummaryStatistics + * instances to be aggregated concurrently, the created instances' + * {@link SummaryStatistics#addValue(double)} methods must synchronize on the aggregating + * instance maintained by this class. In multithreaded environments, if the functionality + * provided by {@link #aggregate(Collection)} is adequate, that method should be used + * to avoid unnecessary computation and synchronization delays.</p> + * + * @since 2.0 + * + */ +public class AggregateSummaryStatistics implements StatisticalSummary, + Serializable { + + + /** Serializable version identifier */ + private static final long serialVersionUID = -8207112444016386906L; + + /** + * A SummaryStatistics serving as a prototype for creating SummaryStatistics + * contributing to this aggregate + */ + private final SummaryStatistics statisticsPrototype; + + /** + * The SummaryStatistics in which aggregate statistics are accumulated. + */ + private final SummaryStatistics statistics; + + /** + * Initializes a new AggregateSummaryStatistics with default statistics + * implementations. + * + */ + public AggregateSummaryStatistics() { + // No try-catch or throws NAE because arg is guaranteed non-null + this(new SummaryStatistics()); + } + + /** + * Initializes a new AggregateSummaryStatistics with the specified statistics + * object as a prototype for contributing statistics and for the internal + * aggregate statistics. This provides for customized statistics implementations + * to be used by contributing and aggregate statistics. + * + * @param prototypeStatistics a {@code SummaryStatistics} serving as a + * prototype both for the internal aggregate statistics and for + * contributing statistics obtained via the + * {@code createContributingStatistics()} method. Being a prototype + * means that other objects are initialized by copying this object's state. + * If {@code null}, a new, default statistics object is used. Any statistic + * values in the prototype are propagated to contributing statistics + * objects and (once) into these aggregate statistics. + * @throws NullArgumentException if prototypeStatistics is null + * @see #createContributingStatistics() + */ + public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics) throws NullArgumentException { + this(prototypeStatistics, + prototypeStatistics == null ? null : new SummaryStatistics(prototypeStatistics)); + } + + /** + * Initializes a new AggregateSummaryStatistics with the specified statistics + * object as a prototype for contributing statistics and for the internal + * aggregate statistics. This provides for different statistics implementations + * to be used by contributing and aggregate statistics and for an initial + * state to be supplied for the aggregate statistics. + * + * @param prototypeStatistics a {@code SummaryStatistics} serving as a + * prototype both for the internal aggregate statistics and for + * contributing statistics obtained via the + * {@code createContributingStatistics()} method. Being a prototype + * means that other objects are initialized by copying this object's state. + * If {@code null}, a new, default statistics object is used. Any statistic + * values in the prototype are propagated to contributing statistics + * objects, but not into these aggregate statistics. + * @param initialStatistics a {@code SummaryStatistics} to serve as the + * internal aggregate statistics object. If {@code null}, a new, default + * statistics object is used. + * @see #createContributingStatistics() + */ + public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics, + SummaryStatistics initialStatistics) { + this.statisticsPrototype = + (prototypeStatistics == null) ? new SummaryStatistics() : prototypeStatistics; + this.statistics = + (initialStatistics == null) ? new SummaryStatistics() : initialStatistics; + } + + /** + * {@inheritDoc}. This version returns the maximum over all the aggregated + * data. + * + * @see StatisticalSummary#getMax() + */ + public double getMax() { + synchronized (statistics) { + return statistics.getMax(); + } + } + + /** + * {@inheritDoc}. This version returns the mean of all the aggregated data. + * + * @see StatisticalSummary#getMean() + */ + public double getMean() { + synchronized (statistics) { + return statistics.getMean(); + } + } + + /** + * {@inheritDoc}. This version returns the minimum over all the aggregated + * data. + * + * @see StatisticalSummary#getMin() + */ + public double getMin() { + synchronized (statistics) { + return statistics.getMin(); + } + } + + /** + * {@inheritDoc}. This version returns a count of all the aggregated data. + * + * @see StatisticalSummary#getN() + */ + public long getN() { + synchronized (statistics) { + return statistics.getN(); + } + } + + /** + * {@inheritDoc}. This version returns the standard deviation of all the + * aggregated data. + * + * @see StatisticalSummary#getStandardDeviation() + */ + public double getStandardDeviation() { + synchronized (statistics) { + return statistics.getStandardDeviation(); + } + } + + /** + * {@inheritDoc}. This version returns a sum of all the aggregated data. + * + * @see StatisticalSummary#getSum() + */ + public double getSum() { + synchronized (statistics) { + return statistics.getSum(); + } + } + + /** + * {@inheritDoc}. This version returns the variance of all the aggregated + * data. + * + * @see StatisticalSummary#getVariance() + */ + public double getVariance() { + synchronized (statistics) { + return statistics.getVariance(); + } + } + + /** + * Returns the sum of the logs of all the aggregated data. + * + * @return the sum of logs + * @see SummaryStatistics#getSumOfLogs() + */ + public double getSumOfLogs() { + synchronized (statistics) { + return statistics.getSumOfLogs(); + } + } + + /** + * Returns the geometric mean of all the aggregated data. + * + * @return the geometric mean + * @see SummaryStatistics#getGeometricMean() + */ + public double getGeometricMean() { + synchronized (statistics) { + return statistics.getGeometricMean(); + } + } + + /** + * Returns the sum of the squares of all the aggregated data. + * + * @return The sum of squares + * @see SummaryStatistics#getSumsq() + */ + public double getSumsq() { + synchronized (statistics) { + return statistics.getSumsq(); + } + } + + /** + * Returns a statistic related to the Second Central Moment. Specifically, + * what is returned is the sum of squared deviations from the sample mean + * among the all of the aggregated data. + * + * @return second central moment statistic + * @see SummaryStatistics#getSecondMoment() + */ + public double getSecondMoment() { + synchronized (statistics) { + return statistics.getSecondMoment(); + } + } + + /** + * Return a {@link StatisticalSummaryValues} instance reporting current + * aggregate statistics. + * + * @return Current values of aggregate statistics + */ + public StatisticalSummary getSummary() { + synchronized (statistics) { + return new StatisticalSummaryValues(getMean(), getVariance(), getN(), + getMax(), getMin(), getSum()); + } + } + + /** + * Creates and returns a {@code SummaryStatistics} whose data will be + * aggregated with those of this {@code AggregateSummaryStatistics}. + * + * @return a {@code SummaryStatistics} whose data will be aggregated with + * those of this {@code AggregateSummaryStatistics}. The initial state + * is a copy of the configured prototype statistics. + */ + public SummaryStatistics createContributingStatistics() { + SummaryStatistics contributingStatistics + = new AggregatingSummaryStatistics(statistics); + + // No try - catch or advertising NAE because neither argument will ever be null + SummaryStatistics.copy(statisticsPrototype, contributingStatistics); + + return contributingStatistics; + } + + /** + * Computes aggregate summary statistics. This method can be used to combine statistics + * computed over partitions or subsamples - i.e., the StatisticalSummaryValues returned + * should contain the same values that would have been obtained by computing a single + * StatisticalSummary over the combined dataset. + * <p> + * Returns null if the collection is empty or null. + * </p> + * + * @param statistics collection of SummaryStatistics to aggregate + * @return summary statistics for the combined dataset + */ + public static StatisticalSummaryValues aggregate(Collection<? extends StatisticalSummary> statistics) { + if (statistics == null) { + return null; + } + Iterator<? extends StatisticalSummary> iterator = statistics.iterator(); + if (!iterator.hasNext()) { + return null; + } + StatisticalSummary current = iterator.next(); + long n = current.getN(); + double min = current.getMin(); + double sum = current.getSum(); + double max = current.getMax(); + double var = current.getVariance(); + double m2 = var * (n - 1d); + double mean = current.getMean(); + while (iterator.hasNext()) { + current = iterator.next(); + if (current.getMin() < min || Double.isNaN(min)) { + min = current.getMin(); + } + if (current.getMax() > max || Double.isNaN(max)) { + max = current.getMax(); + } + sum += current.getSum(); + final double oldN = n; + final double curN = current.getN(); + n += curN; + final double meanDiff = current.getMean() - mean; + mean = sum / n; + final double curM2 = current.getVariance() * (curN - 1d); + m2 = m2 + curM2 + meanDiff * meanDiff * oldN * curN / n; + } + final double variance; + if (n == 0) { + variance = Double.NaN; + } else if (n == 1) { + variance = 0d; + } else { + variance = m2 / (n - 1); + } + return new StatisticalSummaryValues(mean, variance, n, max, min, sum); + } + + /** + * A SummaryStatistics that also forwards all values added to it to a second + * {@code SummaryStatistics} for aggregation. + * + * @since 2.0 + */ + private static class AggregatingSummaryStatistics extends SummaryStatistics { + + /** + * The serialization version of this class + */ + private static final long serialVersionUID = 1L; + + /** + * An additional SummaryStatistics into which values added to these + * statistics (and possibly others) are aggregated + */ + private final SummaryStatistics aggregateStatistics; + + /** + * Initializes a new AggregatingSummaryStatistics with the specified + * aggregate statistics object + * + * @param aggregateStatistics a {@code SummaryStatistics} into which + * values added to this statistics object should be aggregated + */ + AggregatingSummaryStatistics(SummaryStatistics aggregateStatistics) { + this.aggregateStatistics = aggregateStatistics; + } + + /** + * {@inheritDoc}. This version adds the provided value to the configured + * aggregate after adding it to these statistics. + * + * @see SummaryStatistics#addValue(double) + */ + @Override + public void addValue(double value) { + super.addValue(value); + synchronized (aggregateStatistics) { + aggregateStatistics.addValue(value); + } + } + + /** + * Returns true iff <code>object</code> is a + * <code>SummaryStatistics</code> instance and all statistics have the + * same values as this. + * @param object the object to test equality against. + * @return true if object equals this + */ + @Override + public boolean equals(Object object) { + if (object == this) { + return true; + } + if (object instanceof AggregatingSummaryStatistics == false) { + return false; + } + AggregatingSummaryStatistics stat = (AggregatingSummaryStatistics)object; + return super.equals(stat) && + aggregateStatistics.equals(stat.aggregateStatistics); + } + + /** + * Returns hash code based on values of statistics + * @return hash code + */ + @Override + public int hashCode() { + return 123 + super.hashCode() + aggregateStatistics.hashCode(); + } + } +} diff --git a/src/main/java/org/apache/commons/math3/stat/descriptive/DescriptiveStatistics.java b/src/main/java/org/apache/commons/math3/stat/descriptive/DescriptiveStatistics.java new file mode 100644 index 0000000..b215bc8 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/stat/descriptive/DescriptiveStatistics.java @@ -0,0 +1,777 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.stat.descriptive; + +import java.io.Serializable; +import java.lang.reflect.InvocationTargetException; +import java.util.Arrays; + +import org.apache.commons.math3.exception.MathIllegalArgumentException; +import org.apache.commons.math3.exception.NullArgumentException; +import org.apache.commons.math3.exception.MathIllegalStateException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.stat.descriptive.moment.GeometricMean; +import org.apache.commons.math3.stat.descriptive.moment.Kurtosis; +import org.apache.commons.math3.stat.descriptive.moment.Mean; +import org.apache.commons.math3.stat.descriptive.moment.Skewness; +import org.apache.commons.math3.stat.descriptive.moment.Variance; +import org.apache.commons.math3.stat.descriptive.rank.Max; +import org.apache.commons.math3.stat.descriptive.rank.Min; +import org.apache.commons.math3.stat.descriptive.rank.Percentile; +import org.apache.commons.math3.stat.descriptive.summary.Sum; +import org.apache.commons.math3.stat.descriptive.summary.SumOfSquares; +import org.apache.commons.math3.util.MathUtils; +import org.apache.commons.math3.util.ResizableDoubleArray; +import org.apache.commons.math3.util.FastMath; + + +/** + * Maintains a dataset of values of a single variable and computes descriptive + * statistics based on stored data. The {@link #getWindowSize() windowSize} + * property sets a limit on the number of values that can be stored in the + * dataset. The default value, INFINITE_WINDOW, puts no limit on the size of + * the dataset. This value should be used with caution, as the backing store + * will grow without bound in this case. For very large datasets, + * {@link SummaryStatistics}, which does not store the dataset, should be used + * instead of this class. If <code>windowSize</code> is not INFINITE_WINDOW and + * more values are added than can be stored in the dataset, new values are + * added in a "rolling" manner, with new values replacing the "oldest" values + * in the dataset. + * + * <p>Note: this class is not threadsafe. Use + * {@link SynchronizedDescriptiveStatistics} if concurrent access from multiple + * threads is required.</p> + * + */ +public class DescriptiveStatistics implements StatisticalSummary, Serializable { + + /** + * Represents an infinite window size. When the {@link #getWindowSize()} + * returns this value, there is no limit to the number of data values + * that can be stored in the dataset. + */ + public static final int INFINITE_WINDOW = -1; + + /** Serialization UID */ + private static final long serialVersionUID = 4133067267405273064L; + + /** Name of the setQuantile method. */ + private static final String SET_QUANTILE_METHOD_NAME = "setQuantile"; + + /** hold the window size **/ + protected int windowSize = INFINITE_WINDOW; + + /** + * Stored data values + */ + private ResizableDoubleArray eDA = new ResizableDoubleArray(); + + /** Mean statistic implementation - can be reset by setter. */ + private UnivariateStatistic meanImpl = new Mean(); + + /** Geometric mean statistic implementation - can be reset by setter. */ + private UnivariateStatistic geometricMeanImpl = new GeometricMean(); + + /** Kurtosis statistic implementation - can be reset by setter. */ + private UnivariateStatistic kurtosisImpl = new Kurtosis(); + + /** Maximum statistic implementation - can be reset by setter. */ + private UnivariateStatistic maxImpl = new Max(); + + /** Minimum statistic implementation - can be reset by setter. */ + private UnivariateStatistic minImpl = new Min(); + + /** Percentile statistic implementation - can be reset by setter. */ + private UnivariateStatistic percentileImpl = new Percentile(); + + /** Skewness statistic implementation - can be reset by setter. */ + private UnivariateStatistic skewnessImpl = new Skewness(); + + /** Variance statistic implementation - can be reset by setter. */ + private UnivariateStatistic varianceImpl = new Variance(); + + /** Sum of squares statistic implementation - can be reset by setter. */ + private UnivariateStatistic sumsqImpl = new SumOfSquares(); + + /** Sum statistic implementation - can be reset by setter. */ + private UnivariateStatistic sumImpl = new Sum(); + + /** + * Construct a DescriptiveStatistics instance with an infinite window + */ + public DescriptiveStatistics() { + } + + /** + * Construct a DescriptiveStatistics instance with the specified window + * + * @param window the window size. + * @throws MathIllegalArgumentException if window size is less than 1 but + * not equal to {@link #INFINITE_WINDOW} + */ + public DescriptiveStatistics(int window) throws MathIllegalArgumentException { + setWindowSize(window); + } + + /** + * Construct a DescriptiveStatistics instance with an infinite window + * and the initial data values in double[] initialDoubleArray. + * If initialDoubleArray is null, then this constructor corresponds to + * DescriptiveStatistics() + * + * @param initialDoubleArray the initial double[]. + */ + public DescriptiveStatistics(double[] initialDoubleArray) { + if (initialDoubleArray != null) { + eDA = new ResizableDoubleArray(initialDoubleArray); + } + } + + /** + * Copy constructor. Construct a new DescriptiveStatistics instance that + * is a copy of original. + * + * @param original DescriptiveStatistics instance to copy + * @throws NullArgumentException if original is null + */ + public DescriptiveStatistics(DescriptiveStatistics original) throws NullArgumentException { + copy(original, this); + } + + /** + * Adds the value to the dataset. If the dataset is at the maximum size + * (i.e., the number of stored elements equals the currently configured + * windowSize), the first (oldest) element in the dataset is discarded + * to make room for the new value. + * + * @param v the value to be added + */ + public void addValue(double v) { + if (windowSize != INFINITE_WINDOW) { + if (getN() == windowSize) { + eDA.addElementRolling(v); + } else if (getN() < windowSize) { + eDA.addElement(v); + } + } else { + eDA.addElement(v); + } + } + + /** + * Removes the most recent value from the dataset. + * + * @throws MathIllegalStateException if there are no elements stored + */ + public void removeMostRecentValue() throws MathIllegalStateException { + try { + eDA.discardMostRecentElements(1); + } catch (MathIllegalArgumentException ex) { + throw new MathIllegalStateException(LocalizedFormats.NO_DATA); + } + } + + /** + * Replaces the most recently stored value with the given value. + * There must be at least one element stored to call this method. + * + * @param v the value to replace the most recent stored value + * @return replaced value + * @throws MathIllegalStateException if there are no elements stored + */ + public double replaceMostRecentValue(double v) throws MathIllegalStateException { + return eDA.substituteMostRecentElement(v); + } + + /** + * Returns the <a href="http://www.xycoon.com/arithmetic_mean.htm"> + * arithmetic mean </a> of the available values + * @return The mean or Double.NaN if no values have been added. + */ + public double getMean() { + return apply(meanImpl); + } + + /** + * Returns the <a href="http://www.xycoon.com/geometric_mean.htm"> + * geometric mean </a> of the available values. + * <p> + * See {@link GeometricMean} for details on the computing algorithm.</p> + * + * @return The geometricMean, Double.NaN if no values have been added, + * or if any negative values have been added. + */ + public double getGeometricMean() { + return apply(geometricMeanImpl); + } + + /** + * Returns the (sample) variance of the available values. + * + * <p>This method returns the bias-corrected sample variance (using {@code n - 1} in + * the denominator). Use {@link #getPopulationVariance()} for the non-bias-corrected + * population variance.</p> + * + * @return The variance, Double.NaN if no values have been added + * or 0.0 for a single value set. + */ + public double getVariance() { + return apply(varianceImpl); + } + + /** + * Returns the <a href="http://en.wikibooks.org/wiki/Statistics/Summary/Variance"> + * population variance</a> of the available values. + * + * @return The population variance, Double.NaN if no values have been added, + * or 0.0 for a single value set. + */ + public double getPopulationVariance() { + return apply(new Variance(false)); + } + + /** + * Returns the standard deviation of the available values. + * @return The standard deviation, Double.NaN if no values have been added + * or 0.0 for a single value set. + */ + public double getStandardDeviation() { + double stdDev = Double.NaN; + if (getN() > 0) { + if (getN() > 1) { + stdDev = FastMath.sqrt(getVariance()); + } else { + stdDev = 0.0; + } + } + return stdDev; + } + + /** + * Returns the quadratic mean, a.k.a. + * <a href="http://mathworld.wolfram.com/Root-Mean-Square.html"> + * root-mean-square</a> of the available values + * @return The quadratic mean or {@code Double.NaN} if no values + * have been added. + */ + public double getQuadraticMean() { + final long n = getN(); + return n > 0 ? FastMath.sqrt(getSumsq() / n) : Double.NaN; + } + + /** + * Returns the skewness of the available values. Skewness is a + * measure of the asymmetry of a given distribution. + * + * @return The skewness, Double.NaN if less than 3 values have been added. + */ + public double getSkewness() { + return apply(skewnessImpl); + } + + /** + * Returns the Kurtosis of the available values. Kurtosis is a + * measure of the "peakedness" of a distribution. + * + * @return The kurtosis, Double.NaN if less than 4 values have been added. + */ + public double getKurtosis() { + return apply(kurtosisImpl); + } + + /** + * Returns the maximum of the available values + * @return The max or Double.NaN if no values have been added. + */ + public double getMax() { + return apply(maxImpl); + } + + /** + * Returns the minimum of the available values + * @return The min or Double.NaN if no values have been added. + */ + public double getMin() { + return apply(minImpl); + } + + /** + * Returns the number of available values + * @return The number of available values + */ + public long getN() { + return eDA.getNumElements(); + } + + /** + * Returns the sum of the values that have been added to Univariate. + * @return The sum or Double.NaN if no values have been added + */ + public double getSum() { + return apply(sumImpl); + } + + /** + * Returns the sum of the squares of the available values. + * @return The sum of the squares or Double.NaN if no + * values have been added. + */ + public double getSumsq() { + return apply(sumsqImpl); + } + + /** + * Resets all statistics and storage + */ + public void clear() { + eDA.clear(); + } + + + /** + * Returns the maximum number of values that can be stored in the + * dataset, or INFINITE_WINDOW (-1) if there is no limit. + * + * @return The current window size or -1 if its Infinite. + */ + public int getWindowSize() { + return windowSize; + } + + /** + * WindowSize controls the number of values that contribute to the + * reported statistics. For example, if windowSize is set to 3 and the + * values {1,2,3,4,5} have been added <strong> in that order</strong> then + * the <i>available values</i> are {3,4,5} and all reported statistics will + * be based on these values. If {@code windowSize} is decreased as a result + * of this call and there are more than the new value of elements in the + * current dataset, values from the front of the array are discarded to + * reduce the dataset to {@code windowSize} elements. + * + * @param windowSize sets the size of the window. + * @throws MathIllegalArgumentException if window size is less than 1 but + * not equal to {@link #INFINITE_WINDOW} + */ + public void setWindowSize(int windowSize) throws MathIllegalArgumentException { + if (windowSize < 1 && windowSize != INFINITE_WINDOW) { + throw new MathIllegalArgumentException( + LocalizedFormats.NOT_POSITIVE_WINDOW_SIZE, windowSize); + } + + this.windowSize = windowSize; + + // We need to check to see if we need to discard elements + // from the front of the array. If the windowSize is less than + // the current number of elements. + if (windowSize != INFINITE_WINDOW && windowSize < eDA.getNumElements()) { + eDA.discardFrontElements(eDA.getNumElements() - windowSize); + } + } + + /** + * Returns the current set of values in an array of double primitives. + * The order of addition is preserved. The returned array is a fresh + * copy of the underlying data -- i.e., it is not a reference to the + * stored data. + * + * @return returns the current set of numbers in the order in which they + * were added to this set + */ + public double[] getValues() { + return eDA.getElements(); + } + + /** + * Returns the current set of values in an array of double primitives, + * sorted in ascending order. The returned array is a fresh + * copy of the underlying data -- i.e., it is not a reference to the + * stored data. + * @return returns the current set of + * numbers sorted in ascending order + */ + public double[] getSortedValues() { + double[] sort = getValues(); + Arrays.sort(sort); + return sort; + } + + /** + * Returns the element at the specified index + * @param index The Index of the element + * @return return the element at the specified index + */ + public double getElement(int index) { + return eDA.getElement(index); + } + + /** + * Returns an estimate for the pth percentile of the stored values. + * <p> + * The implementation provided here follows the first estimation procedure presented + * <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc252.htm">here.</a> + * </p><p> + * <strong>Preconditions</strong>:<ul> + * <li><code>0 < p ≤ 100</code> (otherwise an + * <code>MathIllegalArgumentException</code> is thrown)</li> + * <li>at least one value must be stored (returns <code>Double.NaN + * </code> otherwise)</li> + * </ul></p> + * + * @param p the requested percentile (scaled from 0 - 100) + * @return An estimate for the pth percentile of the stored data + * @throws MathIllegalStateException if percentile implementation has been + * overridden and the supplied implementation does not support setQuantile + * @throws MathIllegalArgumentException if p is not a valid quantile + */ + public double getPercentile(double p) throws MathIllegalStateException, MathIllegalArgumentException { + if (percentileImpl instanceof Percentile) { + ((Percentile) percentileImpl).setQuantile(p); + } else { + try { + percentileImpl.getClass().getMethod(SET_QUANTILE_METHOD_NAME, + new Class[] {Double.TYPE}).invoke(percentileImpl, + new Object[] {Double.valueOf(p)}); + } catch (NoSuchMethodException e1) { // Setter guard should prevent + throw new MathIllegalStateException( + LocalizedFormats.PERCENTILE_IMPLEMENTATION_UNSUPPORTED_METHOD, + percentileImpl.getClass().getName(), SET_QUANTILE_METHOD_NAME); + } catch (IllegalAccessException e2) { + throw new MathIllegalStateException( + LocalizedFormats.PERCENTILE_IMPLEMENTATION_CANNOT_ACCESS_METHOD, + SET_QUANTILE_METHOD_NAME, percentileImpl.getClass().getName()); + } catch (InvocationTargetException e3) { + throw new IllegalStateException(e3.getCause()); + } + } + return apply(percentileImpl); + } + + /** + * Generates a text report displaying univariate statistics from values + * that have been added. Each statistic is displayed on a separate + * line. + * + * @return String with line feeds displaying statistics + */ + @Override + public String toString() { + StringBuilder outBuffer = new StringBuilder(); + String endl = "\n"; + outBuffer.append("DescriptiveStatistics:").append(endl); + outBuffer.append("n: ").append(getN()).append(endl); + outBuffer.append("min: ").append(getMin()).append(endl); + outBuffer.append("max: ").append(getMax()).append(endl); + outBuffer.append("mean: ").append(getMean()).append(endl); + outBuffer.append("std dev: ").append(getStandardDeviation()) + .append(endl); + try { + // No catch for MIAE because actual parameter is valid below + outBuffer.append("median: ").append(getPercentile(50)).append(endl); + } catch (MathIllegalStateException ex) { + outBuffer.append("median: unavailable").append(endl); + } + outBuffer.append("skewness: ").append(getSkewness()).append(endl); + outBuffer.append("kurtosis: ").append(getKurtosis()).append(endl); + return outBuffer.toString(); + } + + /** + * Apply the given statistic to the data associated with this set of statistics. + * @param stat the statistic to apply + * @return the computed value of the statistic. + */ + public double apply(UnivariateStatistic stat) { + // No try-catch or advertised exception here because arguments are guaranteed valid + return eDA.compute(stat); + } + + // Implementation getters and setter + + /** + * Returns the currently configured mean implementation. + * + * @return the UnivariateStatistic implementing the mean + * @since 1.2 + */ + public synchronized UnivariateStatistic getMeanImpl() { + return meanImpl; + } + + /** + * <p>Sets the implementation for the mean.</p> + * + * @param meanImpl the UnivariateStatistic instance to use + * for computing the mean + * @since 1.2 + */ + public synchronized void setMeanImpl(UnivariateStatistic meanImpl) { + this.meanImpl = meanImpl; + } + + /** + * Returns the currently configured geometric mean implementation. + * + * @return the UnivariateStatistic implementing the geometric mean + * @since 1.2 + */ + public synchronized UnivariateStatistic getGeometricMeanImpl() { + return geometricMeanImpl; + } + + /** + * <p>Sets the implementation for the gemoetric mean.</p> + * + * @param geometricMeanImpl the UnivariateStatistic instance to use + * for computing the geometric mean + * @since 1.2 + */ + public synchronized void setGeometricMeanImpl( + UnivariateStatistic geometricMeanImpl) { + this.geometricMeanImpl = geometricMeanImpl; + } + + /** + * Returns the currently configured kurtosis implementation. + * + * @return the UnivariateStatistic implementing the kurtosis + * @since 1.2 + */ + public synchronized UnivariateStatistic getKurtosisImpl() { + return kurtosisImpl; + } + + /** + * <p>Sets the implementation for the kurtosis.</p> + * + * @param kurtosisImpl the UnivariateStatistic instance to use + * for computing the kurtosis + * @since 1.2 + */ + public synchronized void setKurtosisImpl(UnivariateStatistic kurtosisImpl) { + this.kurtosisImpl = kurtosisImpl; + } + + /** + * Returns the currently configured maximum implementation. + * + * @return the UnivariateStatistic implementing the maximum + * @since 1.2 + */ + public synchronized UnivariateStatistic getMaxImpl() { + return maxImpl; + } + + /** + * <p>Sets the implementation for the maximum.</p> + * + * @param maxImpl the UnivariateStatistic instance to use + * for computing the maximum + * @since 1.2 + */ + public synchronized void setMaxImpl(UnivariateStatistic maxImpl) { + this.maxImpl = maxImpl; + } + + /** + * Returns the currently configured minimum implementation. + * + * @return the UnivariateStatistic implementing the minimum + * @since 1.2 + */ + public synchronized UnivariateStatistic getMinImpl() { + return minImpl; + } + + /** + * <p>Sets the implementation for the minimum.</p> + * + * @param minImpl the UnivariateStatistic instance to use + * for computing the minimum + * @since 1.2 + */ + public synchronized void setMinImpl(UnivariateStatistic minImpl) { + this.minImpl = minImpl; + } + + /** + * Returns the currently configured percentile implementation. + * + * @return the UnivariateStatistic implementing the percentile + * @since 1.2 + */ + public synchronized UnivariateStatistic getPercentileImpl() { + return percentileImpl; + } + + /** + * Sets the implementation to be used by {@link #getPercentile(double)}. + * The supplied <code>UnivariateStatistic</code> must provide a + * <code>setQuantile(double)</code> method; otherwise + * <code>IllegalArgumentException</code> is thrown. + * + * @param percentileImpl the percentileImpl to set + * @throws MathIllegalArgumentException if the supplied implementation does not + * provide a <code>setQuantile</code> method + * @since 1.2 + */ + public synchronized void setPercentileImpl(UnivariateStatistic percentileImpl) + throws MathIllegalArgumentException { + try { + percentileImpl.getClass().getMethod(SET_QUANTILE_METHOD_NAME, + new Class[] {Double.TYPE}).invoke(percentileImpl, + new Object[] {Double.valueOf(50.0d)}); + } catch (NoSuchMethodException e1) { + throw new MathIllegalArgumentException( + LocalizedFormats.PERCENTILE_IMPLEMENTATION_UNSUPPORTED_METHOD, + percentileImpl.getClass().getName(), SET_QUANTILE_METHOD_NAME); + } catch (IllegalAccessException e2) { + throw new MathIllegalArgumentException( + LocalizedFormats.PERCENTILE_IMPLEMENTATION_CANNOT_ACCESS_METHOD, + SET_QUANTILE_METHOD_NAME, percentileImpl.getClass().getName()); + } catch (InvocationTargetException e3) { + throw new IllegalArgumentException(e3.getCause()); + } + this.percentileImpl = percentileImpl; + } + + /** + * Returns the currently configured skewness implementation. + * + * @return the UnivariateStatistic implementing the skewness + * @since 1.2 + */ + public synchronized UnivariateStatistic getSkewnessImpl() { + return skewnessImpl; + } + + /** + * <p>Sets the implementation for the skewness.</p> + * + * @param skewnessImpl the UnivariateStatistic instance to use + * for computing the skewness + * @since 1.2 + */ + public synchronized void setSkewnessImpl( + UnivariateStatistic skewnessImpl) { + this.skewnessImpl = skewnessImpl; + } + + /** + * Returns the currently configured variance implementation. + * + * @return the UnivariateStatistic implementing the variance + * @since 1.2 + */ + public synchronized UnivariateStatistic getVarianceImpl() { + return varianceImpl; + } + + /** + * <p>Sets the implementation for the variance.</p> + * + * @param varianceImpl the UnivariateStatistic instance to use + * for computing the variance + * @since 1.2 + */ + public synchronized void setVarianceImpl( + UnivariateStatistic varianceImpl) { + this.varianceImpl = varianceImpl; + } + + /** + * Returns the currently configured sum of squares implementation. + * + * @return the UnivariateStatistic implementing the sum of squares + * @since 1.2 + */ + public synchronized UnivariateStatistic getSumsqImpl() { + return sumsqImpl; + } + + /** + * <p>Sets the implementation for the sum of squares.</p> + * + * @param sumsqImpl the UnivariateStatistic instance to use + * for computing the sum of squares + * @since 1.2 + */ + public synchronized void setSumsqImpl(UnivariateStatistic sumsqImpl) { + this.sumsqImpl = sumsqImpl; + } + + /** + * Returns the currently configured sum implementation. + * + * @return the UnivariateStatistic implementing the sum + * @since 1.2 + */ + public synchronized UnivariateStatistic getSumImpl() { + return sumImpl; + } + + /** + * <p>Sets the implementation for the sum.</p> + * + * @param sumImpl the UnivariateStatistic instance to use + * for computing the sum + * @since 1.2 + */ + public synchronized void setSumImpl(UnivariateStatistic sumImpl) { + this.sumImpl = sumImpl; + } + + /** + * Returns a copy of this DescriptiveStatistics instance with the same internal state. + * + * @return a copy of this + */ + public DescriptiveStatistics copy() { + DescriptiveStatistics result = new DescriptiveStatistics(); + // No try-catch or advertised exception because parms are guaranteed valid + copy(this, result); + return result; + } + + /** + * Copies source to dest. + * <p>Neither source nor dest can be null.</p> + * + * @param source DescriptiveStatistics to copy + * @param dest DescriptiveStatistics to copy to + * @throws NullArgumentException if either source or dest is null + */ + public static void copy(DescriptiveStatistics source, DescriptiveStatistics dest) + throws NullArgumentException { + MathUtils.checkNotNull(source); + MathUtils.checkNotNull(dest); + // Copy data and window size + dest.eDA = source.eDA.copy(); + dest.windowSize = source.windowSize; + + // Copy implementations + dest.maxImpl = source.maxImpl.copy(); + dest.meanImpl = source.meanImpl.copy(); + dest.minImpl = source.minImpl.copy(); + dest.sumImpl = source.sumImpl.copy(); + dest.varianceImpl = source.varianceImpl.copy(); + dest.sumsqImpl = source.sumsqImpl.copy(); + dest.geometricMeanImpl = source.geometricMeanImpl.copy(); + dest.kurtosisImpl = source.kurtosisImpl; + dest.skewnessImpl = source.skewnessImpl; + dest.percentileImpl = source.percentileImpl; + } +} diff --git a/src/main/java/org/apache/commons/math3/stat/descriptive/MultivariateSummaryStatistics.java b/src/main/java/org/apache/commons/math3/stat/descriptive/MultivariateSummaryStatistics.java new file mode 100644 index 0000000..3ede26e --- /dev/null +++ b/src/main/java/org/apache/commons/math3/stat/descriptive/MultivariateSummaryStatistics.java @@ -0,0 +1,635 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.stat.descriptive; + +import java.io.Serializable; +import java.util.Arrays; + +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.exception.DimensionMismatchException; +import org.apache.commons.math3.exception.MathIllegalStateException; +import org.apache.commons.math3.linear.RealMatrix; +import org.apache.commons.math3.stat.descriptive.moment.GeometricMean; +import org.apache.commons.math3.stat.descriptive.moment.Mean; +import org.apache.commons.math3.stat.descriptive.moment.VectorialCovariance; +import org.apache.commons.math3.stat.descriptive.rank.Max; +import org.apache.commons.math3.stat.descriptive.rank.Min; +import org.apache.commons.math3.stat.descriptive.summary.Sum; +import org.apache.commons.math3.stat.descriptive.summary.SumOfLogs; +import org.apache.commons.math3.stat.descriptive.summary.SumOfSquares; +import org.apache.commons.math3.util.MathUtils; +import org.apache.commons.math3.util.MathArrays; +import org.apache.commons.math3.util.Precision; +import org.apache.commons.math3.util.FastMath; + +/** + * <p>Computes summary statistics for a stream of n-tuples added using the + * {@link #addValue(double[]) addValue} method. The data values are not stored + * in memory, so this class can be used to compute statistics for very large + * n-tuple streams.</p> + * + * <p>The {@link StorelessUnivariateStatistic} instances used to maintain + * summary state and compute statistics are configurable via setters. + * For example, the default implementation for the mean can be overridden by + * calling {@link #setMeanImpl(StorelessUnivariateStatistic[])}. Actual + * parameters to these methods must implement the + * {@link StorelessUnivariateStatistic} interface and configuration must be + * completed before <code>addValue</code> is called. No configuration is + * necessary to use the default, commons-math provided implementations.</p> + * + * <p>To compute statistics for a stream of n-tuples, construct a + * MultivariateStatistics instance with dimension n and then use + * {@link #addValue(double[])} to add n-tuples. The <code>getXxx</code> + * methods where Xxx is a statistic return an array of <code>double</code> + * values, where for <code>i = 0,...,n-1</code> the i<sup>th</sup> array element is the + * value of the given statistic for data range consisting of the i<sup>th</sup> element of + * each of the input n-tuples. For example, if <code>addValue</code> is called + * with actual parameters {0, 1, 2}, then {3, 4, 5} and finally {6, 7, 8}, + * <code>getSum</code> will return a three-element array with values + * {0+3+6, 1+4+7, 2+5+8}</p> + * + * <p>Note: This class is not thread-safe. Use + * {@link SynchronizedMultivariateSummaryStatistics} if concurrent access from multiple + * threads is required.</p> + * + * @since 1.2 + */ +public class MultivariateSummaryStatistics + implements StatisticalMultivariateSummary, Serializable { + + /** Serialization UID */ + private static final long serialVersionUID = 2271900808994826718L; + + /** Dimension of the data. */ + private int k; + + /** Count of values that have been added */ + private long n = 0; + + /** Sum statistic implementation - can be reset by setter. */ + private StorelessUnivariateStatistic[] sumImpl; + + /** Sum of squares statistic implementation - can be reset by setter. */ + private StorelessUnivariateStatistic[] sumSqImpl; + + /** Minimum statistic implementation - can be reset by setter. */ + private StorelessUnivariateStatistic[] minImpl; + + /** Maximum statistic implementation - can be reset by setter. */ + private StorelessUnivariateStatistic[] maxImpl; + + /** Sum of log statistic implementation - can be reset by setter. */ + private StorelessUnivariateStatistic[] sumLogImpl; + + /** Geometric mean statistic implementation - can be reset by setter. */ + private StorelessUnivariateStatistic[] geoMeanImpl; + + /** Mean statistic implementation - can be reset by setter. */ + private StorelessUnivariateStatistic[] meanImpl; + + /** Covariance statistic implementation - cannot be reset. */ + private VectorialCovariance covarianceImpl; + + /** + * Construct a MultivariateSummaryStatistics instance + * @param k dimension of the data + * @param isCovarianceBiasCorrected if true, the unbiased sample + * covariance is computed, otherwise the biased population covariance + * is computed + */ + public MultivariateSummaryStatistics(int k, boolean isCovarianceBiasCorrected) { + this.k = k; + + sumImpl = new StorelessUnivariateStatistic[k]; + sumSqImpl = new StorelessUnivariateStatistic[k]; + minImpl = new StorelessUnivariateStatistic[k]; + maxImpl = new StorelessUnivariateStatistic[k]; + sumLogImpl = new StorelessUnivariateStatistic[k]; + geoMeanImpl = new StorelessUnivariateStatistic[k]; + meanImpl = new StorelessUnivariateStatistic[k]; + + for (int i = 0; i < k; ++i) { + sumImpl[i] = new Sum(); + sumSqImpl[i] = new SumOfSquares(); + minImpl[i] = new Min(); + maxImpl[i] = new Max(); + sumLogImpl[i] = new SumOfLogs(); + geoMeanImpl[i] = new GeometricMean(); + meanImpl[i] = new Mean(); + } + + covarianceImpl = + new VectorialCovariance(k, isCovarianceBiasCorrected); + + } + + /** + * Add an n-tuple to the data + * + * @param value the n-tuple to add + * @throws DimensionMismatchException if the length of the array + * does not match the one used at construction + */ + public void addValue(double[] value) throws DimensionMismatchException { + checkDimension(value.length); + for (int i = 0; i < k; ++i) { + double v = value[i]; + sumImpl[i].increment(v); + sumSqImpl[i].increment(v); + minImpl[i].increment(v); + maxImpl[i].increment(v); + sumLogImpl[i].increment(v); + geoMeanImpl[i].increment(v); + meanImpl[i].increment(v); + } + covarianceImpl.increment(value); + n++; + } + + /** + * Returns the dimension of the data + * @return The dimension of the data + */ + public int getDimension() { + return k; + } + + /** + * Returns the number of available values + * @return The number of available values + */ + public long getN() { + return n; + } + + /** + * Returns an array of the results of a statistic. + * @param stats univariate statistic array + * @return results array + */ + private double[] getResults(StorelessUnivariateStatistic[] stats) { + double[] results = new double[stats.length]; + for (int i = 0; i < results.length; ++i) { + results[i] = stats[i].getResult(); + } + return results; + } + + /** + * Returns an array whose i<sup>th</sup> entry is the sum of the + * i<sup>th</sup> entries of the arrays that have been added using + * {@link #addValue(double[])} + * + * @return the array of component sums + */ + public double[] getSum() { + return getResults(sumImpl); + } + + /** + * Returns an array whose i<sup>th</sup> entry is the sum of squares of the + * i<sup>th</sup> entries of the arrays that have been added using + * {@link #addValue(double[])} + * + * @return the array of component sums of squares + */ + public double[] getSumSq() { + return getResults(sumSqImpl); + } + + /** + * Returns an array whose i<sup>th</sup> entry is the sum of logs of the + * i<sup>th</sup> entries of the arrays that have been added using + * {@link #addValue(double[])} + * + * @return the array of component log sums + */ + public double[] getSumLog() { + return getResults(sumLogImpl); + } + + /** + * Returns an array whose i<sup>th</sup> entry is the mean of the + * i<sup>th</sup> entries of the arrays that have been added using + * {@link #addValue(double[])} + * + * @return the array of component means + */ + public double[] getMean() { + return getResults(meanImpl); + } + + /** + * Returns an array whose i<sup>th</sup> entry is the standard deviation of the + * i<sup>th</sup> entries of the arrays that have been added using + * {@link #addValue(double[])} + * + * @return the array of component standard deviations + */ + public double[] getStandardDeviation() { + double[] stdDev = new double[k]; + if (getN() < 1) { + Arrays.fill(stdDev, Double.NaN); + } else if (getN() < 2) { + Arrays.fill(stdDev, 0.0); + } else { + RealMatrix matrix = covarianceImpl.getResult(); + for (int i = 0; i < k; ++i) { + stdDev[i] = FastMath.sqrt(matrix.getEntry(i, i)); + } + } + return stdDev; + } + + /** + * Returns the covariance matrix of the values that have been added. + * + * @return the covariance matrix + */ + public RealMatrix getCovariance() { + return covarianceImpl.getResult(); + } + + /** + * Returns an array whose i<sup>th</sup> entry is the maximum of the + * i<sup>th</sup> entries of the arrays that have been added using + * {@link #addValue(double[])} + * + * @return the array of component maxima + */ + public double[] getMax() { + return getResults(maxImpl); + } + + /** + * Returns an array whose i<sup>th</sup> entry is the minimum of the + * i<sup>th</sup> entries of the arrays that have been added using + * {@link #addValue(double[])} + * + * @return the array of component minima + */ + public double[] getMin() { + return getResults(minImpl); + } + + /** + * Returns an array whose i<sup>th</sup> entry is the geometric mean of the + * i<sup>th</sup> entries of the arrays that have been added using + * {@link #addValue(double[])} + * + * @return the array of component geometric means + */ + public double[] getGeometricMean() { + return getResults(geoMeanImpl); + } + + /** + * Generates a text report displaying + * summary statistics from values that + * have been added. + * @return String with line feeds displaying statistics + */ + @Override + public String toString() { + final String separator = ", "; + final String suffix = System.getProperty("line.separator"); + StringBuilder outBuffer = new StringBuilder(); + outBuffer.append("MultivariateSummaryStatistics:" + suffix); + outBuffer.append("n: " + getN() + suffix); + append(outBuffer, getMin(), "min: ", separator, suffix); + append(outBuffer, getMax(), "max: ", separator, suffix); + append(outBuffer, getMean(), "mean: ", separator, suffix); + append(outBuffer, getGeometricMean(), "geometric mean: ", separator, suffix); + append(outBuffer, getSumSq(), "sum of squares: ", separator, suffix); + append(outBuffer, getSumLog(), "sum of logarithms: ", separator, suffix); + append(outBuffer, getStandardDeviation(), "standard deviation: ", separator, suffix); + outBuffer.append("covariance: " + getCovariance().toString() + suffix); + return outBuffer.toString(); + } + + /** + * Append a text representation of an array to a buffer. + * @param buffer buffer to fill + * @param data data array + * @param prefix text prefix + * @param separator elements separator + * @param suffix text suffix + */ + private void append(StringBuilder buffer, double[] data, + String prefix, String separator, String suffix) { + buffer.append(prefix); + for (int i = 0; i < data.length; ++i) { + if (i > 0) { + buffer.append(separator); + } + buffer.append(data[i]); + } + buffer.append(suffix); + } + + /** + * Resets all statistics and storage + */ + public void clear() { + this.n = 0; + for (int i = 0; i < k; ++i) { + minImpl[i].clear(); + maxImpl[i].clear(); + sumImpl[i].clear(); + sumLogImpl[i].clear(); + sumSqImpl[i].clear(); + geoMeanImpl[i].clear(); + meanImpl[i].clear(); + } + covarianceImpl.clear(); + } + + /** + * Returns true iff <code>object</code> is a <code>MultivariateSummaryStatistics</code> + * instance and all statistics have the same values as this. + * @param object the object to test equality against. + * @return true if object equals this + */ + @Override + public boolean equals(Object object) { + if (object == this ) { + return true; + } + if (object instanceof MultivariateSummaryStatistics == false) { + return false; + } + MultivariateSummaryStatistics stat = (MultivariateSummaryStatistics) object; + return MathArrays.equalsIncludingNaN(stat.getGeometricMean(), getGeometricMean()) && + MathArrays.equalsIncludingNaN(stat.getMax(), getMax()) && + MathArrays.equalsIncludingNaN(stat.getMean(), getMean()) && + MathArrays.equalsIncludingNaN(stat.getMin(), getMin()) && + Precision.equalsIncludingNaN(stat.getN(), getN()) && + MathArrays.equalsIncludingNaN(stat.getSum(), getSum()) && + MathArrays.equalsIncludingNaN(stat.getSumSq(), getSumSq()) && + MathArrays.equalsIncludingNaN(stat.getSumLog(), getSumLog()) && + stat.getCovariance().equals( getCovariance()); + } + + /** + * Returns hash code based on values of statistics + * + * @return hash code + */ + @Override + public int hashCode() { + int result = 31 + MathUtils.hash(getGeometricMean()); + result = result * 31 + MathUtils.hash(getGeometricMean()); + result = result * 31 + MathUtils.hash(getMax()); + result = result * 31 + MathUtils.hash(getMean()); + result = result * 31 + MathUtils.hash(getMin()); + result = result * 31 + MathUtils.hash(getN()); + result = result * 31 + MathUtils.hash(getSum()); + result = result * 31 + MathUtils.hash(getSumSq()); + result = result * 31 + MathUtils.hash(getSumLog()); + result = result * 31 + getCovariance().hashCode(); + return result; + } + + // Getters and setters for statistics implementations + /** + * Sets statistics implementations. + * @param newImpl new implementations for statistics + * @param oldImpl old implementations for statistics + * @throws DimensionMismatchException if the array dimension + * does not match the one used at construction + * @throws MathIllegalStateException if data has already been added + * (i.e. if n > 0) + */ + private void setImpl(StorelessUnivariateStatistic[] newImpl, + StorelessUnivariateStatistic[] oldImpl) throws MathIllegalStateException, + DimensionMismatchException { + checkEmpty(); + checkDimension(newImpl.length); + System.arraycopy(newImpl, 0, oldImpl, 0, newImpl.length); + } + + /** + * Returns the currently configured Sum implementation + * + * @return the StorelessUnivariateStatistic implementing the sum + */ + public StorelessUnivariateStatistic[] getSumImpl() { + return sumImpl.clone(); + } + + /** + * <p>Sets the implementation for the Sum.</p> + * <p>This method must be activated before any data has been added - i.e., + * before {@link #addValue(double[]) addValue} has been used to add data; + * otherwise an IllegalStateException will be thrown.</p> + * + * @param sumImpl the StorelessUnivariateStatistic instance to use + * for computing the Sum + * @throws DimensionMismatchException if the array dimension + * does not match the one used at construction + * @throws MathIllegalStateException if data has already been added + * (i.e if n > 0) + */ + public void setSumImpl(StorelessUnivariateStatistic[] sumImpl) + throws MathIllegalStateException, DimensionMismatchException { + setImpl(sumImpl, this.sumImpl); + } + + /** + * Returns the currently configured sum of squares implementation + * + * @return the StorelessUnivariateStatistic implementing the sum of squares + */ + public StorelessUnivariateStatistic[] getSumsqImpl() { + return sumSqImpl.clone(); + } + + /** + * <p>Sets the implementation for the sum of squares.</p> + * <p>This method must be activated before any data has been added - i.e., + * before {@link #addValue(double[]) addValue} has been used to add data; + * otherwise an IllegalStateException will be thrown.</p> + * + * @param sumsqImpl the StorelessUnivariateStatistic instance to use + * for computing the sum of squares + * @throws DimensionMismatchException if the array dimension + * does not match the one used at construction + * @throws MathIllegalStateException if data has already been added + * (i.e if n > 0) + */ + public void setSumsqImpl(StorelessUnivariateStatistic[] sumsqImpl) + throws MathIllegalStateException, DimensionMismatchException { + setImpl(sumsqImpl, this.sumSqImpl); + } + + /** + * Returns the currently configured minimum implementation + * + * @return the StorelessUnivariateStatistic implementing the minimum + */ + public StorelessUnivariateStatistic[] getMinImpl() { + return minImpl.clone(); + } + + /** + * <p>Sets the implementation for the minimum.</p> + * <p>This method must be activated before any data has been added - i.e., + * before {@link #addValue(double[]) addValue} has been used to add data; + * otherwise an IllegalStateException will be thrown.</p> + * + * @param minImpl the StorelessUnivariateStatistic instance to use + * for computing the minimum + * @throws DimensionMismatchException if the array dimension + * does not match the one used at construction + * @throws MathIllegalStateException if data has already been added + * (i.e if n > 0) + */ + public void setMinImpl(StorelessUnivariateStatistic[] minImpl) + throws MathIllegalStateException, DimensionMismatchException { + setImpl(minImpl, this.minImpl); + } + + /** + * Returns the currently configured maximum implementation + * + * @return the StorelessUnivariateStatistic implementing the maximum + */ + public StorelessUnivariateStatistic[] getMaxImpl() { + return maxImpl.clone(); + } + + /** + * <p>Sets the implementation for the maximum.</p> + * <p>This method must be activated before any data has been added - i.e., + * before {@link #addValue(double[]) addValue} has been used to add data; + * otherwise an IllegalStateException will be thrown.</p> + * + * @param maxImpl the StorelessUnivariateStatistic instance to use + * for computing the maximum + * @throws DimensionMismatchException if the array dimension + * does not match the one used at construction + * @throws MathIllegalStateException if data has already been added + * (i.e if n > 0) + */ + public void setMaxImpl(StorelessUnivariateStatistic[] maxImpl) + throws MathIllegalStateException, DimensionMismatchException{ + setImpl(maxImpl, this.maxImpl); + } + + /** + * Returns the currently configured sum of logs implementation + * + * @return the StorelessUnivariateStatistic implementing the log sum + */ + public StorelessUnivariateStatistic[] getSumLogImpl() { + return sumLogImpl.clone(); + } + + /** + * <p>Sets the implementation for the sum of logs.</p> + * <p>This method must be activated before any data has been added - i.e., + * before {@link #addValue(double[]) addValue} has been used to add data; + * otherwise an IllegalStateException will be thrown.</p> + * + * @param sumLogImpl the StorelessUnivariateStatistic instance to use + * for computing the log sum + * @throws DimensionMismatchException if the array dimension + * does not match the one used at construction + * @throws MathIllegalStateException if data has already been added + * (i.e if n > 0) + */ + public void setSumLogImpl(StorelessUnivariateStatistic[] sumLogImpl) + throws MathIllegalStateException, DimensionMismatchException{ + setImpl(sumLogImpl, this.sumLogImpl); + } + + /** + * Returns the currently configured geometric mean implementation + * + * @return the StorelessUnivariateStatistic implementing the geometric mean + */ + public StorelessUnivariateStatistic[] getGeoMeanImpl() { + return geoMeanImpl.clone(); + } + + /** + * <p>Sets the implementation for the geometric mean.</p> + * <p>This method must be activated before any data has been added - i.e., + * before {@link #addValue(double[]) addValue} has been used to add data; + * otherwise an IllegalStateException will be thrown.</p> + * + * @param geoMeanImpl the StorelessUnivariateStatistic instance to use + * for computing the geometric mean + * @throws DimensionMismatchException if the array dimension + * does not match the one used at construction + * @throws MathIllegalStateException if data has already been added + * (i.e if n > 0) + */ + public void setGeoMeanImpl(StorelessUnivariateStatistic[] geoMeanImpl) + throws MathIllegalStateException, DimensionMismatchException { + setImpl(geoMeanImpl, this.geoMeanImpl); + } + + /** + * Returns the currently configured mean implementation + * + * @return the StorelessUnivariateStatistic implementing the mean + */ + public StorelessUnivariateStatistic[] getMeanImpl() { + return meanImpl.clone(); + } + + /** + * <p>Sets the implementation for the mean.</p> + * <p>This method must be activated before any data has been added - i.e., + * before {@link #addValue(double[]) addValue} has been used to add data; + * otherwise an IllegalStateException will be thrown.</p> + * + * @param meanImpl the StorelessUnivariateStatistic instance to use + * for computing the mean + * @throws DimensionMismatchException if the array dimension + * does not match the one used at construction + * @throws MathIllegalStateException if data has already been added + * (i.e if n > 0) + */ + public void setMeanImpl(StorelessUnivariateStatistic[] meanImpl) + throws MathIllegalStateException, DimensionMismatchException{ + setImpl(meanImpl, this.meanImpl); + } + + /** + * Throws MathIllegalStateException if the statistic is not empty. + * @throws MathIllegalStateException if n > 0. + */ + private void checkEmpty() throws MathIllegalStateException { + if (n > 0) { + throw new MathIllegalStateException( + LocalizedFormats.VALUES_ADDED_BEFORE_CONFIGURING_STATISTIC, n); + } + } + + /** + * Throws DimensionMismatchException if dimension != k. + * @param dimension dimension to check + * @throws DimensionMismatchException if dimension != k + */ + private void checkDimension(int dimension) throws DimensionMismatchException { + if (dimension != k) { + throw new DimensionMismatchException(dimension, k); + } + } +} diff --git a/src/main/java/org/apache/commons/math3/stat/descriptive/StatisticalMultivariateSummary.java b/src/main/java/org/apache/commons/math3/stat/descriptive/StatisticalMultivariateSummary.java new file mode 100644 index 0000000..bfe4deb --- /dev/null +++ b/src/main/java/org/apache/commons/math3/stat/descriptive/StatisticalMultivariateSummary.java @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.stat.descriptive; + +import org.apache.commons.math3.linear.RealMatrix; + +/** + * Reporting interface for basic multivariate statistics. + * + * @since 1.2 + */ +public interface StatisticalMultivariateSummary { + + /** + * Returns the dimension of the data + * @return The dimension of the data + */ + int getDimension(); + + /** + * Returns an array whose i<sup>th</sup> entry is the + * mean of the i<sup>th</sup> entries of the arrays + * that correspond to each multivariate sample + * + * @return the array of component means + */ + double[] getMean(); + + /** + * Returns the covariance of the available values. + * @return The covariance, null if no multivariate sample + * have been added or a zeroed matrix for a single value set. + */ + RealMatrix getCovariance(); + + /** + * Returns an array whose i<sup>th</sup> entry is the + * standard deviation of the i<sup>th</sup> entries of the arrays + * that correspond to each multivariate sample + * + * @return the array of component standard deviations + */ + double[] getStandardDeviation(); + + /** + * Returns an array whose i<sup>th</sup> entry is the + * maximum of the i<sup>th</sup> entries of the arrays + * that correspond to each multivariate sample + * + * @return the array of component maxima + */ + double[] getMax(); + + /** + * Returns an array whose i<sup>th</sup> entry is the + * minimum of the i<sup>th</sup> entries of the arrays + * that correspond to each multivariate sample + * + * @return the array of component minima + */ + double[] getMin(); + + /** + * Returns the number of available values + * @return The number of available values + */ + long getN(); + + /** + * Returns an array whose i<sup>th</sup> entry is the + * geometric mean of the i<sup>th</sup> entries of the arrays + * that correspond to each multivariate sample + * + * @return the array of component geometric means + */ + double[] getGeometricMean(); + + /** + * Returns an array whose i<sup>th</sup> entry is the + * sum of the i<sup>th</sup> entries of the arrays + * that correspond to each multivariate sample + * + * @return the array of component sums + */ + double[] getSum(); + + /** + * Returns an array whose i<sup>th</sup> entry is the + * sum of squares of the i<sup>th</sup> entries of the arrays + * that correspond to each multivariate sample + * + * @return the array of component sums of squares + */ + double[] getSumSq(); + + /** + * Returns an array whose i<sup>th</sup> entry is the + * sum of logs of the i<sup>th</sup> entries of the arrays + * that correspond to each multivariate sample + * + * @return the array of component log sums + */ + double[] getSumLog(); + +} diff --git a/src/main/java/org/apache/commons/math3/stat/descriptive/StatisticalSummary.java b/src/main/java/org/apache/commons/math3/stat/descriptive/StatisticalSummary.java new file mode 100644 index 0000000..2f310ac --- /dev/null +++ b/src/main/java/org/apache/commons/math3/stat/descriptive/StatisticalSummary.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.stat.descriptive; + +/** + * Reporting interface for basic univariate statistics. + * + */ +public interface StatisticalSummary { + + /** + * Returns the <a href="http://www.xycoon.com/arithmetic_mean.htm"> + * arithmetic mean </a> of the available values + * @return The mean or Double.NaN if no values have been added. + */ + double getMean(); + /** + * Returns the variance of the available values. + * @return The variance, Double.NaN if no values have been added + * or 0.0 for a single value set. + */ + double getVariance(); + /** + * Returns the standard deviation of the available values. + * @return The standard deviation, Double.NaN if no values have been added + * or 0.0 for a single value set. + */ + double getStandardDeviation(); + /** + * Returns the maximum of the available values + * @return The max or Double.NaN if no values have been added. + */ + double getMax(); + /** + * Returns the minimum of the available values + * @return The min or Double.NaN if no values have been added. + */ + double getMin(); + /** + * Returns the number of available values + * @return The number of available values + */ + long getN(); + /** + * Returns the sum of the values that have been added to Univariate. + * @return The sum or Double.NaN if no values have been added + */ + double getSum(); + +} diff --git a/src/main/java/org/apache/commons/math3/stat/descriptive/StatisticalSummaryValues.java b/src/main/java/org/apache/commons/math3/stat/descriptive/StatisticalSummaryValues.java new file mode 100644 index 0000000..e216e9b --- /dev/null +++ b/src/main/java/org/apache/commons/math3/stat/descriptive/StatisticalSummaryValues.java @@ -0,0 +1,186 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.stat.descriptive; + +import java.io.Serializable; + +import org.apache.commons.math3.util.FastMath; +import org.apache.commons.math3.util.MathUtils; +import org.apache.commons.math3.util.Precision; + +/** + * Value object representing the results of a univariate statistical summary. + * + */ +public class StatisticalSummaryValues implements Serializable, + StatisticalSummary { + + /** Serialization id */ + private static final long serialVersionUID = -5108854841843722536L; + + /** The sample mean */ + private final double mean; + + /** The sample variance */ + private final double variance; + + /** The number of observations in the sample */ + private final long n; + + /** The maximum value */ + private final double max; + + /** The minimum value */ + private final double min; + + /** The sum of the sample values */ + private final double sum; + + /** + * Constructor + * + * @param mean the sample mean + * @param variance the sample variance + * @param n the number of observations in the sample + * @param max the maximum value + * @param min the minimum value + * @param sum the sum of the values + */ + public StatisticalSummaryValues(double mean, double variance, long n, + double max, double min, double sum) { + super(); + this.mean = mean; + this.variance = variance; + this.n = n; + this.max = max; + this.min = min; + this.sum = sum; + } + + /** + * @return Returns the max. + */ + public double getMax() { + return max; + } + + /** + * @return Returns the mean. + */ + public double getMean() { + return mean; + } + + /** + * @return Returns the min. + */ + public double getMin() { + return min; + } + + /** + * @return Returns the number of values. + */ + public long getN() { + return n; + } + + /** + * @return Returns the sum. + */ + public double getSum() { + return sum; + } + + /** + * @return Returns the standard deviation + */ + public double getStandardDeviation() { + return FastMath.sqrt(variance); + } + + /** + * @return Returns the variance. + */ + public double getVariance() { + return variance; + } + + /** + * Returns true iff <code>object</code> is a + * <code>StatisticalSummaryValues</code> instance and all statistics have + * the same values as this. + * + * @param object the object to test equality against. + * @return true if object equals this + */ + @Override + public boolean equals(Object object) { + if (object == this ) { + return true; + } + if (object instanceof StatisticalSummaryValues == false) { + return false; + } + StatisticalSummaryValues stat = (StatisticalSummaryValues) object; + return Precision.equalsIncludingNaN(stat.getMax(), getMax()) && + Precision.equalsIncludingNaN(stat.getMean(), getMean()) && + Precision.equalsIncludingNaN(stat.getMin(), getMin()) && + Precision.equalsIncludingNaN(stat.getN(), getN()) && + Precision.equalsIncludingNaN(stat.getSum(), getSum()) && + Precision.equalsIncludingNaN(stat.getVariance(), getVariance()); + } + + /** + * Returns hash code based on values of statistics + * + * @return hash code + */ + @Override + public int hashCode() { + int result = 31 + MathUtils.hash(getMax()); + result = result * 31 + MathUtils.hash(getMean()); + result = result * 31 + MathUtils.hash(getMin()); + result = result * 31 + MathUtils.hash(getN()); + result = result * 31 + MathUtils.hash(getSum()); + result = result * 31 + MathUtils.hash(getVariance()); + return result; + } + + /** + * Generates a text report displaying values of statistics. + * Each statistic is displayed on a separate line. + * + * @return String with line feeds displaying statistics + */ + @Override + public String toString() { + StringBuffer outBuffer = new StringBuffer(); + String endl = "\n"; + outBuffer.append("StatisticalSummaryValues:").append(endl); + outBuffer.append("n: ").append(getN()).append(endl); + outBuffer.append("min: ").append(getMin()).append(endl); + outBuffer.append("max: ").append(getMax()).append(endl); + outBuffer.append("mean: ").append(getMean()).append(endl); + outBuffer.append("std dev: ").append(getStandardDeviation()) + .append(endl); + outBuffer.append("variance: ").append(getVariance()).append(endl); + outBuffer.append("sum: ").append(getSum()).append(endl); + return outBuffer.toString(); + } + +} diff --git a/src/main/java/org/apache/commons/math3/stat/descriptive/StorelessUnivariateStatistic.java b/src/main/java/org/apache/commons/math3/stat/descriptive/StorelessUnivariateStatistic.java new file mode 100644 index 0000000..e1c2464 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/stat/descriptive/StorelessUnivariateStatistic.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.stat.descriptive; + +import org.apache.commons.math3.exception.MathIllegalArgumentException; + +/** + * Extends the definition of {@link UnivariateStatistic} with + * {@link #increment} and {@link #incrementAll(double[])} methods for adding + * values and updating internal state. + * <p> + * This interface is designed to be used for calculating statistics that can be + * computed in one pass through the data without storing the full array of + * sample values.</p> + * + */ +public interface StorelessUnivariateStatistic extends UnivariateStatistic { + + /** + * Updates the internal state of the statistic to reflect the addition of the new value. + * @param d the new value. + */ + void increment(double d); + + /** + * Updates the internal state of the statistic to reflect addition of + * all values in the values array. Does not clear the statistic first -- + * i.e., the values are added <strong>incrementally</strong> to the dataset. + * + * @param values array holding the new values to add + * @throws MathIllegalArgumentException if the array is null + */ + void incrementAll(double[] values) throws MathIllegalArgumentException; + + /** + * Updates the internal state of the statistic to reflect addition of + * the values in the designated portion of the values array. Does not + * clear the statistic first -- i.e., the values are added + * <strong>incrementally</strong> to the dataset. + * + * @param values array holding the new values to add + * @param start the array index of the first value to add + * @param length the number of elements to add + * @throws MathIllegalArgumentException if the array is null or the index + */ + void incrementAll(double[] values, int start, int length) throws MathIllegalArgumentException; + + /** + * Returns the current value of the Statistic. + * @return value of the statistic, <code>Double.NaN</code> if it + * has been cleared or just instantiated. + */ + double getResult(); + + /** + * Returns the number of values that have been added. + * @return the number of values. + */ + long getN(); + + /** + * Clears the internal state of the Statistic + */ + void clear(); + + /** + * Returns a copy of the statistic with the same internal state. + * + * @return a copy of the statistic + */ + StorelessUnivariateStatistic copy(); + +} diff --git a/src/main/java/org/apache/commons/math3/stat/descriptive/SummaryStatistics.java b/src/main/java/org/apache/commons/math3/stat/descriptive/SummaryStatistics.java new file mode 100644 index 0000000..62fee80 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/stat/descriptive/SummaryStatistics.java @@ -0,0 +1,765 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.stat.descriptive; + +import java.io.Serializable; + +import org.apache.commons.math3.exception.MathIllegalStateException; +import org.apache.commons.math3.exception.NullArgumentException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.stat.descriptive.moment.GeometricMean; +import org.apache.commons.math3.stat.descriptive.moment.Mean; +import org.apache.commons.math3.stat.descriptive.moment.SecondMoment; +import org.apache.commons.math3.stat.descriptive.moment.Variance; +import org.apache.commons.math3.stat.descriptive.rank.Max; +import org.apache.commons.math3.stat.descriptive.rank.Min; +import org.apache.commons.math3.stat.descriptive.summary.Sum; +import org.apache.commons.math3.stat.descriptive.summary.SumOfLogs; +import org.apache.commons.math3.stat.descriptive.summary.SumOfSquares; +import org.apache.commons.math3.util.MathUtils; +import org.apache.commons.math3.util.Precision; +import org.apache.commons.math3.util.FastMath; + +/** + * <p> + * Computes summary statistics for a stream of data values added using the + * {@link #addValue(double) addValue} method. The data values are not stored in + * memory, so this class can be used to compute statistics for very large data + * streams. + * </p> + * <p> + * The {@link StorelessUnivariateStatistic} instances used to maintain summary + * state and compute statistics are configurable via setters. For example, the + * default implementation for the variance can be overridden by calling + * {@link #setVarianceImpl(StorelessUnivariateStatistic)}. Actual parameters to + * these methods must implement the {@link StorelessUnivariateStatistic} + * interface and configuration must be completed before <code>addValue</code> + * is called. No configuration is necessary to use the default, commons-math + * provided implementations. + * </p> + * <p> + * Note: This class is not thread-safe. Use + * {@link SynchronizedSummaryStatistics} if concurrent access from multiple + * threads is required. + * </p> + */ +public class SummaryStatistics implements StatisticalSummary, Serializable { + + /** Serialization UID */ + private static final long serialVersionUID = -2021321786743555871L; + + /** count of values that have been added */ + private long n = 0; + + /** SecondMoment is used to compute the mean and variance */ + private SecondMoment secondMoment = new SecondMoment(); + + /** sum of values that have been added */ + private Sum sum = new Sum(); + + /** sum of the square of each value that has been added */ + private SumOfSquares sumsq = new SumOfSquares(); + + /** min of values that have been added */ + private Min min = new Min(); + + /** max of values that have been added */ + private Max max = new Max(); + + /** sumLog of values that have been added */ + private SumOfLogs sumLog = new SumOfLogs(); + + /** geoMean of values that have been added */ + private GeometricMean geoMean = new GeometricMean(sumLog); + + /** mean of values that have been added */ + private Mean mean = new Mean(secondMoment); + + /** variance of values that have been added */ + private Variance variance = new Variance(secondMoment); + + /** Sum statistic implementation - can be reset by setter. */ + private StorelessUnivariateStatistic sumImpl = sum; + + /** Sum of squares statistic implementation - can be reset by setter. */ + private StorelessUnivariateStatistic sumsqImpl = sumsq; + + /** Minimum statistic implementation - can be reset by setter. */ + private StorelessUnivariateStatistic minImpl = min; + + /** Maximum statistic implementation - can be reset by setter. */ + private StorelessUnivariateStatistic maxImpl = max; + + /** Sum of log statistic implementation - can be reset by setter. */ + private StorelessUnivariateStatistic sumLogImpl = sumLog; + + /** Geometric mean statistic implementation - can be reset by setter. */ + private StorelessUnivariateStatistic geoMeanImpl = geoMean; + + /** Mean statistic implementation - can be reset by setter. */ + private StorelessUnivariateStatistic meanImpl = mean; + + /** Variance statistic implementation - can be reset by setter. */ + private StorelessUnivariateStatistic varianceImpl = variance; + + /** + * Construct a SummaryStatistics instance + */ + public SummaryStatistics() { + } + + /** + * A copy constructor. Creates a deep-copy of the {@code original}. + * + * @param original the {@code SummaryStatistics} instance to copy + * @throws NullArgumentException if original is null + */ + public SummaryStatistics(SummaryStatistics original) throws NullArgumentException { + copy(original, this); + } + + /** + * Return a {@link StatisticalSummaryValues} instance reporting current + * statistics. + * @return Current values of statistics + */ + public StatisticalSummary getSummary() { + return new StatisticalSummaryValues(getMean(), getVariance(), getN(), + getMax(), getMin(), getSum()); + } + + /** + * Add a value to the data + * @param value the value to add + */ + public void addValue(double value) { + sumImpl.increment(value); + sumsqImpl.increment(value); + minImpl.increment(value); + maxImpl.increment(value); + sumLogImpl.increment(value); + secondMoment.increment(value); + // If mean, variance or geomean have been overridden, + // need to increment these + if (meanImpl != mean) { + meanImpl.increment(value); + } + if (varianceImpl != variance) { + varianceImpl.increment(value); + } + if (geoMeanImpl != geoMean) { + geoMeanImpl.increment(value); + } + n++; + } + + /** + * Returns the number of available values + * @return The number of available values + */ + public long getN() { + return n; + } + + /** + * Returns the sum of the values that have been added + * @return The sum or <code>Double.NaN</code> if no values have been added + */ + public double getSum() { + return sumImpl.getResult(); + } + + /** + * Returns the sum of the squares of the values that have been added. + * <p> + * Double.NaN is returned if no values have been added. + * </p> + * @return The sum of squares + */ + public double getSumsq() { + return sumsqImpl.getResult(); + } + + /** + * Returns the mean of the values that have been added. + * <p> + * Double.NaN is returned if no values have been added. + * </p> + * @return the mean + */ + public double getMean() { + return meanImpl.getResult(); + } + + /** + * Returns the standard deviation of the values that have been added. + * <p> + * Double.NaN is returned if no values have been added. + * </p> + * @return the standard deviation + */ + public double getStandardDeviation() { + double stdDev = Double.NaN; + if (getN() > 0) { + if (getN() > 1) { + stdDev = FastMath.sqrt(getVariance()); + } else { + stdDev = 0.0; + } + } + return stdDev; + } + + /** + * Returns the quadratic mean, a.k.a. + * <a href="http://mathworld.wolfram.com/Root-Mean-Square.html"> + * root-mean-square</a> of the available values + * @return The quadratic mean or {@code Double.NaN} if no values + * have been added. + */ + public double getQuadraticMean() { + final long size = getN(); + return size > 0 ? FastMath.sqrt(getSumsq() / size) : Double.NaN; + } + + /** + * Returns the (sample) variance of the available values. + * + * <p>This method returns the bias-corrected sample variance (using {@code n - 1} in + * the denominator). Use {@link #getPopulationVariance()} for the non-bias-corrected + * population variance.</p> + * + * <p>Double.NaN is returned if no values have been added.</p> + * + * @return the variance + */ + public double getVariance() { + return varianceImpl.getResult(); + } + + /** + * Returns the <a href="http://en.wikibooks.org/wiki/Statistics/Summary/Variance"> + * population variance</a> of the values that have been added. + * + * <p>Double.NaN is returned if no values have been added.</p> + * + * @return the population variance + */ + public double getPopulationVariance() { + Variance populationVariance = new Variance(secondMoment); + populationVariance.setBiasCorrected(false); + return populationVariance.getResult(); + } + + /** + * Returns the maximum of the values that have been added. + * <p> + * Double.NaN is returned if no values have been added. + * </p> + * @return the maximum + */ + public double getMax() { + return maxImpl.getResult(); + } + + /** + * Returns the minimum of the values that have been added. + * <p> + * Double.NaN is returned if no values have been added. + * </p> + * @return the minimum + */ + public double getMin() { + return minImpl.getResult(); + } + + /** + * Returns the geometric mean of the values that have been added. + * <p> + * Double.NaN is returned if no values have been added. + * </p> + * @return the geometric mean + */ + public double getGeometricMean() { + return geoMeanImpl.getResult(); + } + + /** + * Returns the sum of the logs of the values that have been added. + * <p> + * Double.NaN is returned if no values have been added. + * </p> + * @return the sum of logs + * @since 1.2 + */ + public double getSumOfLogs() { + return sumLogImpl.getResult(); + } + + /** + * Returns a statistic related to the Second Central Moment. Specifically, + * what is returned is the sum of squared deviations from the sample mean + * among the values that have been added. + * <p> + * Returns <code>Double.NaN</code> if no data values have been added and + * returns <code>0</code> if there is just one value in the data set.</p> + * <p> + * @return second central moment statistic + * @since 2.0 + */ + public double getSecondMoment() { + return secondMoment.getResult(); + } + + /** + * Generates a text report displaying summary statistics from values that + * have been added. + * @return String with line feeds displaying statistics + * @since 1.2 + */ + @Override + public String toString() { + StringBuilder outBuffer = new StringBuilder(); + String endl = "\n"; + outBuffer.append("SummaryStatistics:").append(endl); + outBuffer.append("n: ").append(getN()).append(endl); + outBuffer.append("min: ").append(getMin()).append(endl); + outBuffer.append("max: ").append(getMax()).append(endl); + outBuffer.append("sum: ").append(getSum()).append(endl); + outBuffer.append("mean: ").append(getMean()).append(endl); + outBuffer.append("geometric mean: ").append(getGeometricMean()) + .append(endl); + outBuffer.append("variance: ").append(getVariance()).append(endl); + outBuffer.append("population variance: ").append(getPopulationVariance()).append(endl); + outBuffer.append("second moment: ").append(getSecondMoment()).append(endl); + outBuffer.append("sum of squares: ").append(getSumsq()).append(endl); + outBuffer.append("standard deviation: ").append(getStandardDeviation()) + .append(endl); + outBuffer.append("sum of logs: ").append(getSumOfLogs()).append(endl); + return outBuffer.toString(); + } + + /** + * Resets all statistics and storage + */ + public void clear() { + this.n = 0; + minImpl.clear(); + maxImpl.clear(); + sumImpl.clear(); + sumLogImpl.clear(); + sumsqImpl.clear(); + geoMeanImpl.clear(); + secondMoment.clear(); + if (meanImpl != mean) { + meanImpl.clear(); + } + if (varianceImpl != variance) { + varianceImpl.clear(); + } + } + + /** + * Returns true iff <code>object</code> is a + * <code>SummaryStatistics</code> instance and all statistics have the + * same values as this. + * @param object the object to test equality against. + * @return true if object equals this + */ + @Override + public boolean equals(Object object) { + if (object == this) { + return true; + } + if (object instanceof SummaryStatistics == false) { + return false; + } + SummaryStatistics stat = (SummaryStatistics)object; + return Precision.equalsIncludingNaN(stat.getGeometricMean(), getGeometricMean()) && + Precision.equalsIncludingNaN(stat.getMax(), getMax()) && + Precision.equalsIncludingNaN(stat.getMean(), getMean()) && + Precision.equalsIncludingNaN(stat.getMin(), getMin()) && + Precision.equalsIncludingNaN(stat.getN(), getN()) && + Precision.equalsIncludingNaN(stat.getSum(), getSum()) && + Precision.equalsIncludingNaN(stat.getSumsq(), getSumsq()) && + Precision.equalsIncludingNaN(stat.getVariance(), getVariance()); + } + + /** + * Returns hash code based on values of statistics + * @return hash code + */ + @Override + public int hashCode() { + int result = 31 + MathUtils.hash(getGeometricMean()); + result = result * 31 + MathUtils.hash(getGeometricMean()); + result = result * 31 + MathUtils.hash(getMax()); + result = result * 31 + MathUtils.hash(getMean()); + result = result * 31 + MathUtils.hash(getMin()); + result = result * 31 + MathUtils.hash(getN()); + result = result * 31 + MathUtils.hash(getSum()); + result = result * 31 + MathUtils.hash(getSumsq()); + result = result * 31 + MathUtils.hash(getVariance()); + return result; + } + + // Getters and setters for statistics implementations + /** + * Returns the currently configured Sum implementation + * @return the StorelessUnivariateStatistic implementing the sum + * @since 1.2 + */ + public StorelessUnivariateStatistic getSumImpl() { + return sumImpl; + } + + /** + * <p> + * Sets the implementation for the Sum. + * </p> + * <p> + * This method cannot be activated after data has been added - i.e., + * after {@link #addValue(double) addValue} has been used to add data. + * If it is activated after data has been added, an IllegalStateException + * will be thrown. + * </p> + * @param sumImpl the StorelessUnivariateStatistic instance to use for + * computing the Sum + * @throws MathIllegalStateException if data has already been added (i.e if n >0) + * @since 1.2 + */ + public void setSumImpl(StorelessUnivariateStatistic sumImpl) + throws MathIllegalStateException { + checkEmpty(); + this.sumImpl = sumImpl; + } + + /** + * Returns the currently configured sum of squares implementation + * @return the StorelessUnivariateStatistic implementing the sum of squares + * @since 1.2 + */ + public StorelessUnivariateStatistic getSumsqImpl() { + return sumsqImpl; + } + + /** + * <p> + * Sets the implementation for the sum of squares. + * </p> + * <p> + * This method cannot be activated after data has been added - i.e., + * after {@link #addValue(double) addValue} has been used to add data. + * If it is activated after data has been added, an IllegalStateException + * will be thrown. + * </p> + * @param sumsqImpl the StorelessUnivariateStatistic instance to use for + * computing the sum of squares + * @throws MathIllegalStateException if data has already been added (i.e if n > 0) + * @since 1.2 + */ + public void setSumsqImpl(StorelessUnivariateStatistic sumsqImpl) + throws MathIllegalStateException { + checkEmpty(); + this.sumsqImpl = sumsqImpl; + } + + /** + * Returns the currently configured minimum implementation + * @return the StorelessUnivariateStatistic implementing the minimum + * @since 1.2 + */ + public StorelessUnivariateStatistic getMinImpl() { + return minImpl; + } + + /** + * <p> + * Sets the implementation for the minimum. + * </p> + * <p> + * This method cannot be activated after data has been added - i.e., + * after {@link #addValue(double) addValue} has been used to add data. + * If it is activated after data has been added, an IllegalStateException + * will be thrown. + * </p> + * @param minImpl the StorelessUnivariateStatistic instance to use for + * computing the minimum + * @throws MathIllegalStateException if data has already been added (i.e if n > 0) + * @since 1.2 + */ + public void setMinImpl(StorelessUnivariateStatistic minImpl) + throws MathIllegalStateException { + checkEmpty(); + this.minImpl = minImpl; + } + + /** + * Returns the currently configured maximum implementation + * @return the StorelessUnivariateStatistic implementing the maximum + * @since 1.2 + */ + public StorelessUnivariateStatistic getMaxImpl() { + return maxImpl; + } + + /** + * <p> + * Sets the implementation for the maximum. + * </p> + * <p> + * This method cannot be activated after data has been added - i.e., + * after {@link #addValue(double) addValue} has been used to add data. + * If it is activated after data has been added, an IllegalStateException + * will be thrown. + * </p> + * @param maxImpl the StorelessUnivariateStatistic instance to use for + * computing the maximum + * @throws MathIllegalStateException if data has already been added (i.e if n > 0) + * @since 1.2 + */ + public void setMaxImpl(StorelessUnivariateStatistic maxImpl) + throws MathIllegalStateException { + checkEmpty(); + this.maxImpl = maxImpl; + } + + /** + * Returns the currently configured sum of logs implementation + * @return the StorelessUnivariateStatistic implementing the log sum + * @since 1.2 + */ + public StorelessUnivariateStatistic getSumLogImpl() { + return sumLogImpl; + } + + /** + * <p> + * Sets the implementation for the sum of logs. + * </p> + * <p> + * This method cannot be activated after data has been added - i.e., + * after {@link #addValue(double) addValue} has been used to add data. + * If it is activated after data has been added, an IllegalStateException + * will be thrown. + * </p> + * @param sumLogImpl the StorelessUnivariateStatistic instance to use for + * computing the log sum + * @throws MathIllegalStateException if data has already been added (i.e if n > 0) + * @since 1.2 + */ + public void setSumLogImpl(StorelessUnivariateStatistic sumLogImpl) + throws MathIllegalStateException { + checkEmpty(); + this.sumLogImpl = sumLogImpl; + geoMean.setSumLogImpl(sumLogImpl); + } + + /** + * Returns the currently configured geometric mean implementation + * @return the StorelessUnivariateStatistic implementing the geometric mean + * @since 1.2 + */ + public StorelessUnivariateStatistic getGeoMeanImpl() { + return geoMeanImpl; + } + + /** + * <p> + * Sets the implementation for the geometric mean. + * </p> + * <p> + * This method cannot be activated after data has been added - i.e., + * after {@link #addValue(double) addValue} has been used to add data. + * If it is activated after data has been added, an IllegalStateException + * will be thrown. + * </p> + * @param geoMeanImpl the StorelessUnivariateStatistic instance to use for + * computing the geometric mean + * @throws MathIllegalStateException if data has already been added (i.e if n > 0) + * @since 1.2 + */ + public void setGeoMeanImpl(StorelessUnivariateStatistic geoMeanImpl) + throws MathIllegalStateException { + checkEmpty(); + this.geoMeanImpl = geoMeanImpl; + } + + /** + * Returns the currently configured mean implementation + * @return the StorelessUnivariateStatistic implementing the mean + * @since 1.2 + */ + public StorelessUnivariateStatistic getMeanImpl() { + return meanImpl; + } + + /** + * <p> + * Sets the implementation for the mean. + * </p> + * <p> + * This method cannot be activated after data has been added - i.e., + * after {@link #addValue(double) addValue} has been used to add data. + * If it is activated after data has been added, an IllegalStateException + * will be thrown. + * </p> + * @param meanImpl the StorelessUnivariateStatistic instance to use for + * computing the mean + * @throws MathIllegalStateException if data has already been added (i.e if n > 0) + * @since 1.2 + */ + public void setMeanImpl(StorelessUnivariateStatistic meanImpl) + throws MathIllegalStateException { + checkEmpty(); + this.meanImpl = meanImpl; + } + + /** + * Returns the currently configured variance implementation + * @return the StorelessUnivariateStatistic implementing the variance + * @since 1.2 + */ + public StorelessUnivariateStatistic getVarianceImpl() { + return varianceImpl; + } + + /** + * <p> + * Sets the implementation for the variance. + * </p> + * <p> + * This method cannot be activated after data has been added - i.e., + * after {@link #addValue(double) addValue} has been used to add data. + * If it is activated after data has been added, an IllegalStateException + * will be thrown. + * </p> + * @param varianceImpl the StorelessUnivariateStatistic instance to use for + * computing the variance + * @throws MathIllegalStateException if data has already been added (i.e if n > 0) + * @since 1.2 + */ + public void setVarianceImpl(StorelessUnivariateStatistic varianceImpl) + throws MathIllegalStateException { + checkEmpty(); + this.varianceImpl = varianceImpl; + } + + /** + * Throws IllegalStateException if n > 0. + * @throws MathIllegalStateException if data has been added + */ + private void checkEmpty() throws MathIllegalStateException { + if (n > 0) { + throw new MathIllegalStateException( + LocalizedFormats.VALUES_ADDED_BEFORE_CONFIGURING_STATISTIC, n); + } + } + + /** + * Returns a copy of this SummaryStatistics instance with the same internal state. + * + * @return a copy of this + */ + public SummaryStatistics copy() { + SummaryStatistics result = new SummaryStatistics(); + // No try-catch or advertised exception because arguments are guaranteed non-null + copy(this, result); + return result; + } + + /** + * Copies source to dest. + * <p>Neither source nor dest can be null.</p> + * + * @param source SummaryStatistics to copy + * @param dest SummaryStatistics to copy to + * @throws NullArgumentException if either source or dest is null + */ + public static void copy(SummaryStatistics source, SummaryStatistics dest) + throws NullArgumentException { + MathUtils.checkNotNull(source); + MathUtils.checkNotNull(dest); + dest.maxImpl = source.maxImpl.copy(); + dest.minImpl = source.minImpl.copy(); + dest.sumImpl = source.sumImpl.copy(); + dest.sumLogImpl = source.sumLogImpl.copy(); + dest.sumsqImpl = source.sumsqImpl.copy(); + dest.secondMoment = source.secondMoment.copy(); + dest.n = source.n; + + // Keep commons-math supplied statistics with embedded moments in synch + if (source.getVarianceImpl() instanceof Variance) { + dest.varianceImpl = new Variance(dest.secondMoment); + } else { + dest.varianceImpl = source.varianceImpl.copy(); + } + if (source.meanImpl instanceof Mean) { + dest.meanImpl = new Mean(dest.secondMoment); + } else { + dest.meanImpl = source.meanImpl.copy(); + } + if (source.getGeoMeanImpl() instanceof GeometricMean) { + dest.geoMeanImpl = new GeometricMean((SumOfLogs) dest.sumLogImpl); + } else { + dest.geoMeanImpl = source.geoMeanImpl.copy(); + } + + // Make sure that if stat == statImpl in source, same + // holds in dest; otherwise copy stat + if (source.geoMean == source.geoMeanImpl) { + dest.geoMean = (GeometricMean) dest.geoMeanImpl; + } else { + GeometricMean.copy(source.geoMean, dest.geoMean); + } + if (source.max == source.maxImpl) { + dest.max = (Max) dest.maxImpl; + } else { + Max.copy(source.max, dest.max); + } + if (source.mean == source.meanImpl) { + dest.mean = (Mean) dest.meanImpl; + } else { + Mean.copy(source.mean, dest.mean); + } + if (source.min == source.minImpl) { + dest.min = (Min) dest.minImpl; + } else { + Min.copy(source.min, dest.min); + } + if (source.sum == source.sumImpl) { + dest.sum = (Sum) dest.sumImpl; + } else { + Sum.copy(source.sum, dest.sum); + } + if (source.variance == source.varianceImpl) { + dest.variance = (Variance) dest.varianceImpl; + } else { + Variance.copy(source.variance, dest.variance); + } + if (source.sumLog == source.sumLogImpl) { + dest.sumLog = (SumOfLogs) dest.sumLogImpl; + } else { + SumOfLogs.copy(source.sumLog, dest.sumLog); + } + if (source.sumsq == source.sumsqImpl) { + dest.sumsq = (SumOfSquares) dest.sumsqImpl; + } else { + SumOfSquares.copy(source.sumsq, dest.sumsq); + } + } +} diff --git a/src/main/java/org/apache/commons/math3/stat/descriptive/SynchronizedDescriptiveStatistics.java b/src/main/java/org/apache/commons/math3/stat/descriptive/SynchronizedDescriptiveStatistics.java new file mode 100644 index 0000000..270e4aa --- /dev/null +++ b/src/main/java/org/apache/commons/math3/stat/descriptive/SynchronizedDescriptiveStatistics.java @@ -0,0 +1,192 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.stat.descriptive; + +import org.apache.commons.math3.exception.MathIllegalArgumentException; +import org.apache.commons.math3.exception.NullArgumentException; +import org.apache.commons.math3.util.MathUtils; + +/** + * Implementation of + * {@link org.apache.commons.math3.stat.descriptive.DescriptiveStatistics} that + * is safe to use in a multithreaded environment. Multiple threads can safely + * operate on a single instance without causing runtime exceptions due to race + * conditions. In effect, this implementation makes modification and access + * methods atomic operations for a single instance. That is to say, as one + * thread is computing a statistic from the instance, no other thread can modify + * the instance nor compute another statistic. + * + * @since 1.2 + */ +public class SynchronizedDescriptiveStatistics extends DescriptiveStatistics { + + /** Serialization UID */ + private static final long serialVersionUID = 1L; + + /** + * Construct an instance with infinite window + */ + public SynchronizedDescriptiveStatistics() { + // no try-catch or advertized IAE because arg is valid + this(INFINITE_WINDOW); + } + + /** + * Construct an instance with finite window + * @param window the finite window size. + * @throws MathIllegalArgumentException if window size is less than 1 but + * not equal to {@link #INFINITE_WINDOW} + */ + public SynchronizedDescriptiveStatistics(int window) throws MathIllegalArgumentException { + super(window); + } + + /** + * A copy constructor. Creates a deep-copy of the {@code original}. + * + * @param original the {@code SynchronizedDescriptiveStatistics} instance to copy + * @throws NullArgumentException if original is null + */ + public SynchronizedDescriptiveStatistics(SynchronizedDescriptiveStatistics original) + throws NullArgumentException { + copy(original, this); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized void addValue(double v) { + super.addValue(v); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized double apply(UnivariateStatistic stat) { + return super.apply(stat); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized void clear() { + super.clear(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized double getElement(int index) { + return super.getElement(index); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized long getN() { + return super.getN(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized double getStandardDeviation() { + return super.getStandardDeviation(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized double getQuadraticMean() { + return super.getQuadraticMean(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized double[] getValues() { + return super.getValues(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized int getWindowSize() { + return super.getWindowSize(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized void setWindowSize(int windowSize) throws MathIllegalArgumentException { + super.setWindowSize(windowSize); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized String toString() { + return super.toString(); + } + + /** + * Returns a copy of this SynchronizedDescriptiveStatistics instance with the + * same internal state. + * + * @return a copy of this + */ + @Override + public synchronized SynchronizedDescriptiveStatistics copy() { + SynchronizedDescriptiveStatistics result = + new SynchronizedDescriptiveStatistics(); + // No try-catch or advertised exception because arguments are guaranteed non-null + copy(this, result); + return result; + } + + /** + * Copies source to dest. + * <p>Neither source nor dest can be null.</p> + * <p>Acquires synchronization lock on source, then dest before copying.</p> + * + * @param source SynchronizedDescriptiveStatistics to copy + * @param dest SynchronizedDescriptiveStatistics to copy to + * @throws NullArgumentException if either source or dest is null + */ + public static void copy(SynchronizedDescriptiveStatistics source, + SynchronizedDescriptiveStatistics dest) + throws NullArgumentException { + MathUtils.checkNotNull(source); + MathUtils.checkNotNull(dest); + synchronized (source) { + synchronized (dest) { + DescriptiveStatistics.copy(source, dest); + } + } + } +} diff --git a/src/main/java/org/apache/commons/math3/stat/descriptive/SynchronizedMultivariateSummaryStatistics.java b/src/main/java/org/apache/commons/math3/stat/descriptive/SynchronizedMultivariateSummaryStatistics.java new file mode 100644 index 0000000..889eb3a --- /dev/null +++ b/src/main/java/org/apache/commons/math3/stat/descriptive/SynchronizedMultivariateSummaryStatistics.java @@ -0,0 +1,297 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.stat.descriptive; + +import org.apache.commons.math3.exception.DimensionMismatchException; +import org.apache.commons.math3.exception.MathIllegalStateException; +import org.apache.commons.math3.linear.RealMatrix; + +/** + * Implementation of + * {@link org.apache.commons.math3.stat.descriptive.MultivariateSummaryStatistics} that + * is safe to use in a multithreaded environment. Multiple threads can safely + * operate on a single instance without causing runtime exceptions due to race + * conditions. In effect, this implementation makes modification and access + * methods atomic operations for a single instance. That is to say, as one + * thread is computing a statistic from the instance, no other thread can modify + * the instance nor compute another statistic. + * @since 1.2 + */ +public class SynchronizedMultivariateSummaryStatistics + extends MultivariateSummaryStatistics { + + /** Serialization UID */ + private static final long serialVersionUID = 7099834153347155363L; + + /** + * Construct a SynchronizedMultivariateSummaryStatistics instance + * @param k dimension of the data + * @param isCovarianceBiasCorrected if true, the unbiased sample + * covariance is computed, otherwise the biased population covariance + * is computed + */ + public SynchronizedMultivariateSummaryStatistics(int k, boolean isCovarianceBiasCorrected) { + super(k, isCovarianceBiasCorrected); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized void addValue(double[] value) throws DimensionMismatchException { + super.addValue(value); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized int getDimension() { + return super.getDimension(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized long getN() { + return super.getN(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized double[] getSum() { + return super.getSum(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized double[] getSumSq() { + return super.getSumSq(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized double[] getSumLog() { + return super.getSumLog(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized double[] getMean() { + return super.getMean(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized double[] getStandardDeviation() { + return super.getStandardDeviation(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized RealMatrix getCovariance() { + return super.getCovariance(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized double[] getMax() { + return super.getMax(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized double[] getMin() { + return super.getMin(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized double[] getGeometricMean() { + return super.getGeometricMean(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized String toString() { + return super.toString(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized void clear() { + super.clear(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized boolean equals(Object object) { + return super.equals(object); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized int hashCode() { + return super.hashCode(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized StorelessUnivariateStatistic[] getSumImpl() { + return super.getSumImpl(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized void setSumImpl(StorelessUnivariateStatistic[] sumImpl) + throws DimensionMismatchException, MathIllegalStateException { + super.setSumImpl(sumImpl); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized StorelessUnivariateStatistic[] getSumsqImpl() { + return super.getSumsqImpl(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized void setSumsqImpl(StorelessUnivariateStatistic[] sumsqImpl) + throws DimensionMismatchException, MathIllegalStateException { + super.setSumsqImpl(sumsqImpl); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized StorelessUnivariateStatistic[] getMinImpl() { + return super.getMinImpl(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized void setMinImpl(StorelessUnivariateStatistic[] minImpl) + throws DimensionMismatchException, MathIllegalStateException { + super.setMinImpl(minImpl); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized StorelessUnivariateStatistic[] getMaxImpl() { + return super.getMaxImpl(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized void setMaxImpl(StorelessUnivariateStatistic[] maxImpl) + throws DimensionMismatchException, MathIllegalStateException{ + super.setMaxImpl(maxImpl); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized StorelessUnivariateStatistic[] getSumLogImpl() { + return super.getSumLogImpl(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized void setSumLogImpl(StorelessUnivariateStatistic[] sumLogImpl) + throws DimensionMismatchException, MathIllegalStateException { + super.setSumLogImpl(sumLogImpl); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized StorelessUnivariateStatistic[] getGeoMeanImpl() { + return super.getGeoMeanImpl(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized void setGeoMeanImpl(StorelessUnivariateStatistic[] geoMeanImpl) + throws DimensionMismatchException, MathIllegalStateException { + super.setGeoMeanImpl(geoMeanImpl); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized StorelessUnivariateStatistic[] getMeanImpl() { + return super.getMeanImpl(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized void setMeanImpl(StorelessUnivariateStatistic[] meanImpl) + throws DimensionMismatchException, MathIllegalStateException { + super.setMeanImpl(meanImpl); + } +} diff --git a/src/main/java/org/apache/commons/math3/stat/descriptive/SynchronizedSummaryStatistics.java b/src/main/java/org/apache/commons/math3/stat/descriptive/SynchronizedSummaryStatistics.java new file mode 100644 index 0000000..7eaf9ac --- /dev/null +++ b/src/main/java/org/apache/commons/math3/stat/descriptive/SynchronizedSummaryStatistics.java @@ -0,0 +1,366 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.stat.descriptive; + +import org.apache.commons.math3.exception.MathIllegalStateException; +import org.apache.commons.math3.exception.NullArgumentException; +import org.apache.commons.math3.util.MathUtils; + +/** + * Implementation of + * {@link org.apache.commons.math3.stat.descriptive.SummaryStatistics} that + * is safe to use in a multithreaded environment. Multiple threads can safely + * operate on a single instance without causing runtime exceptions due to race + * conditions. In effect, this implementation makes modification and access + * methods atomic operations for a single instance. That is to say, as one + * thread is computing a statistic from the instance, no other thread can modify + * the instance nor compute another statistic. + * + * @since 1.2 + */ +public class SynchronizedSummaryStatistics extends SummaryStatistics { + + /** Serialization UID */ + private static final long serialVersionUID = 1909861009042253704L; + + /** + * Construct a SynchronizedSummaryStatistics instance + */ + public SynchronizedSummaryStatistics() { + super(); + } + + /** + * A copy constructor. Creates a deep-copy of the {@code original}. + * + * @param original the {@code SynchronizedSummaryStatistics} instance to copy + * @throws NullArgumentException if original is null + */ + public SynchronizedSummaryStatistics(SynchronizedSummaryStatistics original) + throws NullArgumentException { + copy(original, this); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized StatisticalSummary getSummary() { + return super.getSummary(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized void addValue(double value) { + super.addValue(value); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized long getN() { + return super.getN(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized double getSum() { + return super.getSum(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized double getSumsq() { + return super.getSumsq(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized double getMean() { + return super.getMean(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized double getStandardDeviation() { + return super.getStandardDeviation(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized double getQuadraticMean() { + return super.getQuadraticMean(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized double getVariance() { + return super.getVariance(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized double getPopulationVariance() { + return super.getPopulationVariance(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized double getMax() { + return super.getMax(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized double getMin() { + return super.getMin(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized double getGeometricMean() { + return super.getGeometricMean(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized String toString() { + return super.toString(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized void clear() { + super.clear(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized boolean equals(Object object) { + return super.equals(object); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized int hashCode() { + return super.hashCode(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized StorelessUnivariateStatistic getSumImpl() { + return super.getSumImpl(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized void setSumImpl(StorelessUnivariateStatistic sumImpl) + throws MathIllegalStateException { + super.setSumImpl(sumImpl); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized StorelessUnivariateStatistic getSumsqImpl() { + return super.getSumsqImpl(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized void setSumsqImpl(StorelessUnivariateStatistic sumsqImpl) + throws MathIllegalStateException { + super.setSumsqImpl(sumsqImpl); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized StorelessUnivariateStatistic getMinImpl() { + return super.getMinImpl(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized void setMinImpl(StorelessUnivariateStatistic minImpl) + throws MathIllegalStateException { + super.setMinImpl(minImpl); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized StorelessUnivariateStatistic getMaxImpl() { + return super.getMaxImpl(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized void setMaxImpl(StorelessUnivariateStatistic maxImpl) + throws MathIllegalStateException { + super.setMaxImpl(maxImpl); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized StorelessUnivariateStatistic getSumLogImpl() { + return super.getSumLogImpl(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized void setSumLogImpl(StorelessUnivariateStatistic sumLogImpl) + throws MathIllegalStateException { + super.setSumLogImpl(sumLogImpl); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized StorelessUnivariateStatistic getGeoMeanImpl() { + return super.getGeoMeanImpl(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized void setGeoMeanImpl(StorelessUnivariateStatistic geoMeanImpl) + throws MathIllegalStateException { + super.setGeoMeanImpl(geoMeanImpl); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized StorelessUnivariateStatistic getMeanImpl() { + return super.getMeanImpl(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized void setMeanImpl(StorelessUnivariateStatistic meanImpl) + throws MathIllegalStateException { + super.setMeanImpl(meanImpl); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized StorelessUnivariateStatistic getVarianceImpl() { + return super.getVarianceImpl(); + } + + /** + * {@inheritDoc} + */ + @Override + public synchronized void setVarianceImpl(StorelessUnivariateStatistic varianceImpl) + throws MathIllegalStateException { + super.setVarianceImpl(varianceImpl); + } + + /** + * Returns a copy of this SynchronizedSummaryStatistics instance with the + * same internal state. + * + * @return a copy of this + */ + @Override + public synchronized SynchronizedSummaryStatistics copy() { + SynchronizedSummaryStatistics result = + new SynchronizedSummaryStatistics(); + // No try-catch or advertised exception because arguments are guaranteed non-null + copy(this, result); + return result; + } + + /** + * Copies source to dest. + * <p>Neither source nor dest can be null.</p> + * <p>Acquires synchronization lock on source, then dest before copying.</p> + * + * @param source SynchronizedSummaryStatistics to copy + * @param dest SynchronizedSummaryStatistics to copy to + * @throws NullArgumentException if either source or dest is null + */ + public static void copy(SynchronizedSummaryStatistics source, + SynchronizedSummaryStatistics dest) + throws NullArgumentException { + MathUtils.checkNotNull(source); + MathUtils.checkNotNull(dest); + synchronized (source) { + synchronized (dest) { + SummaryStatistics.copy(source, dest); + } + } + } + +} diff --git a/src/main/java/org/apache/commons/math3/stat/descriptive/UnivariateStatistic.java b/src/main/java/org/apache/commons/math3/stat/descriptive/UnivariateStatistic.java new file mode 100644 index 0000000..5d6c9fe --- /dev/null +++ b/src/main/java/org/apache/commons/math3/stat/descriptive/UnivariateStatistic.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.stat.descriptive; + +import org.apache.commons.math3.exception.MathIllegalArgumentException; +import org.apache.commons.math3.util.MathArrays; + + +/** + * Base interface implemented by all statistics. + * + */ +public interface UnivariateStatistic extends MathArrays.Function { + /** + * Returns the result of evaluating the statistic over the input array. + * + * @param values input array + * @return the value of the statistic applied to the input array + * @throws MathIllegalArgumentException if values is null + */ + double evaluate(double[] values) throws MathIllegalArgumentException; + + /** + * Returns the result of evaluating the statistic over the specified entries + * in the input array. + * + * @param values the input array + * @param begin the index of the first element to include + * @param length the number of elements to include + * @return the value of the statistic applied to the included array entries + * @throws MathIllegalArgumentException if values is null or the indices are invalid + */ + double evaluate(double[] values, int begin, int length) throws MathIllegalArgumentException; + + /** + * Returns a copy of the statistic with the same internal state. + * + * @return a copy of the statistic + */ + UnivariateStatistic copy(); +} diff --git a/src/main/java/org/apache/commons/math3/stat/descriptive/WeightedEvaluation.java b/src/main/java/org/apache/commons/math3/stat/descriptive/WeightedEvaluation.java new file mode 100644 index 0000000..01693dc --- /dev/null +++ b/src/main/java/org/apache/commons/math3/stat/descriptive/WeightedEvaluation.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.stat.descriptive; + +import org.apache.commons.math3.exception.MathIllegalArgumentException; + +/** + * Weighted evaluation for statistics. + * + * @since 2.1 + */ +public interface WeightedEvaluation { + + /** + * Returns the result of evaluating the statistic over the input array, + * using the supplied weights. + * + * @param values input array + * @param weights array of weights + * @return the value of the weighted statistic applied to the input array + * @throws MathIllegalArgumentException if either array is null, lengths + * do not match, weights contain NaN, negative or infinite values, or + * weights does not include at least on positive value + */ + double evaluate(double[] values, double[] weights) throws MathIllegalArgumentException; + + /** + * Returns the result of evaluating the statistic over the specified entries + * in the input array, using corresponding entries in the supplied weights array. + * + * @param values the input array + * @param weights array of weights + * @param begin the index of the first element to include + * @param length the number of elements to include + * @return the value of the weighted statistic applied to the included array entries + * @throws MathIllegalArgumentException if either array is null, lengths + * do not match, indices are invalid, weights contain NaN, negative or + * infinite values, or weights does not include at least on positive value + */ + double evaluate(double[] values, double[] weights, int begin, int length) + throws MathIllegalArgumentException; + +} diff --git a/src/main/java/org/apache/commons/math3/stat/descriptive/moment/FirstMoment.java b/src/main/java/org/apache/commons/math3/stat/descriptive/moment/FirstMoment.java new file mode 100644 index 0000000..c153724 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/stat/descriptive/moment/FirstMoment.java @@ -0,0 +1,169 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.stat.descriptive.moment; + +import java.io.Serializable; + +import org.apache.commons.math3.exception.NullArgumentException; +import org.apache.commons.math3.stat.descriptive.AbstractStorelessUnivariateStatistic; +import org.apache.commons.math3.util.MathUtils; + +/** + * Computes the first moment (arithmetic mean). Uses the definitional formula: + * <p> + * mean = sum(x_i) / n </p> + * <p> + * where <code>n</code> is the number of observations. </p> + * <p> + * To limit numeric errors, the value of the statistic is computed using the + * following recursive updating algorithm: </p> + * <p> + * <ol> + * <li>Initialize <code>m = </code> the first value</li> + * <li>For each additional value, update using <br> + * <code>m = m + (new value - m) / (number of observations)</code></li> + * </ol></p> + * <p> + * Returns <code>Double.NaN</code> if the dataset is empty. Note that + * Double.NaN may also be returned if the input includes NaN and / or infinite + * values.</p> + * <p> + * <strong>Note that this implementation is not synchronized.</strong> If + * multiple threads access an instance of this class concurrently, and at least + * one of the threads invokes the <code>increment()</code> or + * <code>clear()</code> method, it must be synchronized externally.</p> + * + */ +class FirstMoment extends AbstractStorelessUnivariateStatistic + implements Serializable { + + /** Serializable version identifier */ + private static final long serialVersionUID = 6112755307178490473L; + + + /** Count of values that have been added */ + protected long n; + + /** First moment of values that have been added */ + protected double m1; + + /** + * Deviation of most recently added value from previous first moment. + * Retained to prevent repeated computation in higher order moments. + */ + protected double dev; + + /** + * Deviation of most recently added value from previous first moment, + * normalized by previous sample size. Retained to prevent repeated + * computation in higher order moments + */ + protected double nDev; + + /** + * Create a FirstMoment instance + */ + FirstMoment() { + n = 0; + m1 = Double.NaN; + dev = Double.NaN; + nDev = Double.NaN; + } + + /** + * Copy constructor, creates a new {@code FirstMoment} identical + * to the {@code original} + * + * @param original the {@code FirstMoment} instance to copy + * @throws NullArgumentException if original is null + */ + FirstMoment(FirstMoment original) throws NullArgumentException { + super(); + copy(original, this); + } + + /** + * {@inheritDoc} + */ + @Override + public void increment(final double d) { + if (n == 0) { + m1 = 0.0; + } + n++; + double n0 = n; + dev = d - m1; + nDev = dev / n0; + m1 += nDev; + } + + /** + * {@inheritDoc} + */ + @Override + public void clear() { + m1 = Double.NaN; + n = 0; + dev = Double.NaN; + nDev = Double.NaN; + } + + /** + * {@inheritDoc} + */ + @Override + public double getResult() { + return m1; + } + + /** + * {@inheritDoc} + */ + public long getN() { + return n; + } + + /** + * {@inheritDoc} + */ + @Override + public FirstMoment copy() { + FirstMoment result = new FirstMoment(); + // No try-catch or advertised exception because args are guaranteed non-null + copy(this, result); + return result; + } + + /** + * Copies source to dest. + * <p>Neither source nor dest can be null.</p> + * + * @param source FirstMoment to copy + * @param dest FirstMoment to copy to + * @throws NullArgumentException if either source or dest is null + */ + public static void copy(FirstMoment source, FirstMoment dest) + throws NullArgumentException { + MathUtils.checkNotNull(source); + MathUtils.checkNotNull(dest); + dest.setData(source.getDataRef()); + dest.n = source.n; + dest.m1 = source.m1; + dest.dev = source.dev; + dest.nDev = source.nDev; + } +} diff --git a/src/main/java/org/apache/commons/math3/stat/descriptive/moment/FourthMoment.java b/src/main/java/org/apache/commons/math3/stat/descriptive/moment/FourthMoment.java new file mode 100644 index 0000000..0c199d8 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/stat/descriptive/moment/FourthMoment.java @@ -0,0 +1,151 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.stat.descriptive.moment; + +import java.io.Serializable; + +import org.apache.commons.math3.exception.NullArgumentException; +import org.apache.commons.math3.util.MathUtils; + +/** + * Computes a statistic related to the Fourth Central Moment. Specifically, + * what is computed is the sum of + * <p> + * (x_i - xbar) ^ 4, </p> + * <p> + * where the x_i are the + * sample observations and xbar is the sample mean. </p> + * <p> + * The following recursive updating formula is used: </p> + * <p> + * Let <ul> + * <li> dev = (current obs - previous mean) </li> + * <li> m2 = previous value of {@link SecondMoment} </li> + * <li> m2 = previous value of {@link ThirdMoment} </li> + * <li> n = number of observations (including current obs) </li> + * </ul> + * Then </p> + * <p> + * new value = old value - 4 * (dev/n) * m3 + 6 * (dev/n)^2 * m2 + <br> + * [n^2 - 3 * (n-1)] * dev^4 * (n-1) / n^3 </p> + * <p> + * Returns <code>Double.NaN</code> if no data values have been added and + * returns <code>0</code> if there is just one value in the data set. Note that + * Double.NaN may also be returned if the input includes NaN and / or infinite + * values. </p> + * <p> + * <strong>Note that this implementation is not synchronized.</strong> If + * multiple threads access an instance of this class concurrently, and at least + * one of the threads invokes the <code>increment()</code> or + * <code>clear()</code> method, it must be synchronized externally. </p> + * + */ +class FourthMoment extends ThirdMoment implements Serializable{ + + /** Serializable version identifier */ + private static final long serialVersionUID = 4763990447117157611L; + + /** fourth moment of values that have been added */ + private double m4; + + /** + * Create a FourthMoment instance + */ + FourthMoment() { + super(); + m4 = Double.NaN; + } + + /** + * Copy constructor, creates a new {@code FourthMoment} identical + * to the {@code original} + * + * @param original the {@code FourthMoment} instance to copy + * @throws NullArgumentException if original is null + */ + FourthMoment(FourthMoment original) throws NullArgumentException { + super(); + copy(original, this); + } + + /** + * {@inheritDoc} + */ + @Override + public void increment(final double d) { + if (n < 1) { + m4 = 0.0; + m3 = 0.0; + m2 = 0.0; + m1 = 0.0; + } + + double prevM3 = m3; + double prevM2 = m2; + + super.increment(d); + + double n0 = n; + + m4 = m4 - 4.0 * nDev * prevM3 + 6.0 * nDevSq * prevM2 + + ((n0 * n0) - 3 * (n0 -1)) * (nDevSq * nDevSq * (n0 - 1) * n0); + } + + /** + * {@inheritDoc} + */ + @Override + public double getResult() { + return m4; + } + + /** + * {@inheritDoc} + */ + @Override + public void clear() { + super.clear(); + m4 = Double.NaN; + } + + /** + * {@inheritDoc} + */ + @Override + public FourthMoment copy() { + FourthMoment result = new FourthMoment(); + // No try-catch or advertised exception because args are guaranteed non-null + copy(this, result); + return result; + } + + /** + * Copies source to dest. + * <p>Neither source nor dest can be null.</p> + * + * @param source FourthMoment to copy + * @param dest FourthMoment to copy to + * @throws NullArgumentException if either source or dest is null + */ + public static void copy(FourthMoment source, FourthMoment dest) + throws NullArgumentException { + MathUtils.checkNotNull(source); + MathUtils.checkNotNull(dest); + ThirdMoment.copy(source, dest); + dest.m4 = source.m4; + } +} diff --git a/src/main/java/org/apache/commons/math3/stat/descriptive/moment/GeometricMean.java b/src/main/java/org/apache/commons/math3/stat/descriptive/moment/GeometricMean.java new file mode 100644 index 0000000..bfee9df --- /dev/null +++ b/src/main/java/org/apache/commons/math3/stat/descriptive/moment/GeometricMean.java @@ -0,0 +1,214 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.stat.descriptive.moment; + +import java.io.Serializable; + +import org.apache.commons.math3.exception.MathIllegalArgumentException; +import org.apache.commons.math3.exception.MathIllegalStateException; +import org.apache.commons.math3.exception.NullArgumentException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.stat.descriptive.AbstractStorelessUnivariateStatistic; +import org.apache.commons.math3.stat.descriptive.StorelessUnivariateStatistic; +import org.apache.commons.math3.stat.descriptive.summary.SumOfLogs; +import org.apache.commons.math3.util.FastMath; +import org.apache.commons.math3.util.MathUtils; + +/** + * Returns the <a href="http://www.xycoon.com/geometric_mean.htm"> + * geometric mean </a> of the available values. + * <p> + * Uses a {@link SumOfLogs} instance to compute sum of logs and returns + * <code> exp( 1/n (sum of logs) ).</code> Therefore, </p> + * <ul> + * <li>If any of values are < 0, the result is <code>NaN.</code></li> + * <li>If all values are non-negative and less than + * <code>Double.POSITIVE_INFINITY</code>, but at least one value is 0, the + * result is <code>0.</code></li> + * <li>If both <code>Double.POSITIVE_INFINITY</code> and + * <code>Double.NEGATIVE_INFINITY</code> are among the values, the result is + * <code>NaN.</code></li> + * </ul> </p> + * <p> + * <strong>Note that this implementation is not synchronized.</strong> If + * multiple threads access an instance of this class concurrently, and at least + * one of the threads invokes the <code>increment()</code> or + * <code>clear()</code> method, it must be synchronized externally.</p> + * + * + */ +public class GeometricMean extends AbstractStorelessUnivariateStatistic implements Serializable { + + /** Serializable version identifier */ + private static final long serialVersionUID = -8178734905303459453L; + + /** Wrapped SumOfLogs instance */ + private StorelessUnivariateStatistic sumOfLogs; + + /** + * Create a GeometricMean instance + */ + public GeometricMean() { + sumOfLogs = new SumOfLogs(); + } + + /** + * Copy constructor, creates a new {@code GeometricMean} identical + * to the {@code original} + * + * @param original the {@code GeometricMean} instance to copy + * @throws NullArgumentException if original is null + */ + public GeometricMean(GeometricMean original) throws NullArgumentException { + super(); + copy(original, this); + } + + /** + * Create a GeometricMean instance using the given SumOfLogs instance + * @param sumOfLogs sum of logs instance to use for computation + */ + public GeometricMean(SumOfLogs sumOfLogs) { + this.sumOfLogs = sumOfLogs; + } + + /** + * {@inheritDoc} + */ + @Override + public GeometricMean copy() { + GeometricMean result = new GeometricMean(); + // no try-catch or advertised exception because args guaranteed non-null + copy(this, result); + return result; + } + + /** + * {@inheritDoc} + */ + @Override + public void increment(final double d) { + sumOfLogs.increment(d); + } + + /** + * {@inheritDoc} + */ + @Override + public double getResult() { + if (sumOfLogs.getN() > 0) { + return FastMath.exp(sumOfLogs.getResult() / sumOfLogs.getN()); + } else { + return Double.NaN; + } + } + + /** + * {@inheritDoc} + */ + @Override + public void clear() { + sumOfLogs.clear(); + } + + /** + * Returns the geometric mean of the entries in the specified portion + * of the input array. + * <p> + * See {@link GeometricMean} for details on the computing algorithm.</p> + * <p> + * Throws <code>IllegalArgumentException</code> if the array is null.</p> + * + * @param values input array containing the values + * @param begin first array element to include + * @param length the number of elements to include + * @return the geometric mean or Double.NaN if length = 0 or + * any of the values are <= 0. + * @throws MathIllegalArgumentException if the input array is null or the array + * index parameters are not valid + */ + @Override + public double evaluate( + final double[] values, final int begin, final int length) + throws MathIllegalArgumentException { + return FastMath.exp( + sumOfLogs.evaluate(values, begin, length) / length); + } + + /** + * {@inheritDoc} + */ + public long getN() { + return sumOfLogs.getN(); + } + + /** + * <p>Sets the implementation for the sum of logs.</p> + * <p>This method must be activated before any data has been added - i.e., + * before {@link #increment(double) increment} has been used to add data; + * otherwise an IllegalStateException will be thrown.</p> + * + * @param sumLogImpl the StorelessUnivariateStatistic instance to use + * for computing the log sum + * @throws MathIllegalStateException if data has already been added + * (i.e if n > 0) + */ + public void setSumLogImpl(StorelessUnivariateStatistic sumLogImpl) + throws MathIllegalStateException { + checkEmpty(); + this.sumOfLogs = sumLogImpl; + } + + /** + * Returns the currently configured sum of logs implementation + * + * @return the StorelessUnivariateStatistic implementing the log sum + */ + public StorelessUnivariateStatistic getSumLogImpl() { + return sumOfLogs; + } + + /** + * Copies source to dest. + * <p>Neither source nor dest can be null.</p> + * + * @param source GeometricMean to copy + * @param dest GeometricMean to copy to + * @throws NullArgumentException if either source or dest is null + */ + public static void copy(GeometricMean source, GeometricMean dest) + throws NullArgumentException { + MathUtils.checkNotNull(source); + MathUtils.checkNotNull(dest); + dest.setData(source.getDataRef()); + dest.sumOfLogs = source.sumOfLogs.copy(); + } + + + /** + * Throws MathIllegalStateException if n > 0. + * @throws MathIllegalStateException if data has been added to this statistic + */ + private void checkEmpty() throws MathIllegalStateException { + if (getN() > 0) { + throw new MathIllegalStateException( + LocalizedFormats.VALUES_ADDED_BEFORE_CONFIGURING_STATISTIC, + getN()); + } + } + +} diff --git a/src/main/java/org/apache/commons/math3/stat/descriptive/moment/Kurtosis.java b/src/main/java/org/apache/commons/math3/stat/descriptive/moment/Kurtosis.java new file mode 100644 index 0000000..be04fbe --- /dev/null +++ b/src/main/java/org/apache/commons/math3/stat/descriptive/moment/Kurtosis.java @@ -0,0 +1,226 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.stat.descriptive.moment; + +import java.io.Serializable; + +import org.apache.commons.math3.exception.MathIllegalArgumentException; +import org.apache.commons.math3.exception.NullArgumentException; +import org.apache.commons.math3.stat.descriptive.AbstractStorelessUnivariateStatistic; +import org.apache.commons.math3.util.FastMath; +import org.apache.commons.math3.util.MathUtils; + + +/** + * Computes the Kurtosis of the available values. + * <p> + * We use the following (unbiased) formula to define kurtosis:</p> + * <p> + * kurtosis = { [n(n+1) / (n -1)(n - 2)(n-3)] sum[(x_i - mean)^4] / std^4 } - [3(n-1)^2 / (n-2)(n-3)] + * </p><p> + * where n is the number of values, mean is the {@link Mean} and std is the + * {@link StandardDeviation}</p> + * <p> + * Note that this statistic is undefined for n < 4. <code>Double.Nan</code> + * is returned when there is not sufficient data to compute the statistic. + * Note that Double.NaN may also be returned if the input includes NaN + * and / or infinite values.</p> + * <p> + * <strong>Note that this implementation is not synchronized.</strong> If + * multiple threads access an instance of this class concurrently, and at least + * one of the threads invokes the <code>increment()</code> or + * <code>clear()</code> method, it must be synchronized externally.</p> + * + */ +public class Kurtosis extends AbstractStorelessUnivariateStatistic implements Serializable { + + /** Serializable version identifier */ + private static final long serialVersionUID = 2784465764798260919L; + + /**Fourth Moment on which this statistic is based */ + protected FourthMoment moment; + + /** + * Determines whether or not this statistic can be incremented or cleared. + * <p> + * Statistics based on (constructed from) external moments cannot + * be incremented or cleared.</p> + */ + protected boolean incMoment; + + /** + * Construct a Kurtosis + */ + public Kurtosis() { + incMoment = true; + moment = new FourthMoment(); + } + + /** + * Construct a Kurtosis from an external moment + * + * @param m4 external Moment + */ + public Kurtosis(final FourthMoment m4) { + incMoment = false; + this.moment = m4; + } + + /** + * Copy constructor, creates a new {@code Kurtosis} identical + * to the {@code original} + * + * @param original the {@code Kurtosis} instance to copy + * @throws NullArgumentException if original is null + */ + public Kurtosis(Kurtosis original) throws NullArgumentException { + copy(original, this); + } + + /** + * {@inheritDoc} + * <p>Note that when {@link #Kurtosis(FourthMoment)} is used to + * create a Variance, this method does nothing. In that case, the + * FourthMoment should be incremented directly.</p> + */ + @Override + public void increment(final double d) { + if (incMoment) { + moment.increment(d); + } + } + + /** + * {@inheritDoc} + */ + @Override + public double getResult() { + double kurtosis = Double.NaN; + if (moment.getN() > 3) { + double variance = moment.m2 / (moment.n - 1); + if (moment.n <= 3 || variance < 10E-20) { + kurtosis = 0.0; + } else { + double n = moment.n; + kurtosis = + (n * (n + 1) * moment.getResult() - + 3 * moment.m2 * moment.m2 * (n - 1)) / + ((n - 1) * (n -2) * (n -3) * variance * variance); + } + } + return kurtosis; + } + + /** + * {@inheritDoc} + */ + @Override + public void clear() { + if (incMoment) { + moment.clear(); + } + } + + /** + * {@inheritDoc} + */ + public long getN() { + return moment.getN(); + } + + /* UnvariateStatistic Approach */ + + /** + * Returns the kurtosis of the entries in the specified portion of the + * input array. + * <p> + * See {@link Kurtosis} for details on the computing algorithm.</p> + * <p> + * Throws <code>IllegalArgumentException</code> if the array is null.</p> + * + * @param values the input array + * @param begin index of the first array element to include + * @param length the number of elements to include + * @return the kurtosis of the values or Double.NaN if length is less than 4 + * @throws MathIllegalArgumentException if the input array is null or the array + * index parameters are not valid + */ + @Override + public double evaluate(final double[] values,final int begin, final int length) + throws MathIllegalArgumentException { + // Initialize the kurtosis + double kurt = Double.NaN; + + if (test(values, begin, length) && length > 3) { + + // Compute the mean and standard deviation + Variance variance = new Variance(); + variance.incrementAll(values, begin, length); + double mean = variance.moment.m1; + double stdDev = FastMath.sqrt(variance.getResult()); + + // Sum the ^4 of the distance from the mean divided by the + // standard deviation + double accum3 = 0.0; + for (int i = begin; i < begin + length; i++) { + accum3 += FastMath.pow(values[i] - mean, 4.0); + } + accum3 /= FastMath.pow(stdDev, 4.0d); + + // Get N + double n0 = length; + + double coefficientOne = + (n0 * (n0 + 1)) / ((n0 - 1) * (n0 - 2) * (n0 - 3)); + double termTwo = + (3 * FastMath.pow(n0 - 1, 2.0)) / ((n0 - 2) * (n0 - 3)); + + // Calculate kurtosis + kurt = (coefficientOne * accum3) - termTwo; + } + return kurt; + } + + /** + * {@inheritDoc} + */ + @Override + public Kurtosis copy() { + Kurtosis result = new Kurtosis(); + // No try-catch because args are guaranteed non-null + copy(this, result); + return result; + } + + /** + * Copies source to dest. + * <p>Neither source nor dest can be null.</p> + * + * @param source Kurtosis to copy + * @param dest Kurtosis to copy to + * @throws NullArgumentException if either source or dest is null + */ + public static void copy(Kurtosis source, Kurtosis dest) + throws NullArgumentException { + MathUtils.checkNotNull(source); + MathUtils.checkNotNull(dest); + dest.setData(source.getDataRef()); + dest.moment = source.moment.copy(); + dest.incMoment = source.incMoment; + } + +} diff --git a/src/main/java/org/apache/commons/math3/stat/descriptive/moment/Mean.java b/src/main/java/org/apache/commons/math3/stat/descriptive/moment/Mean.java new file mode 100644 index 0000000..aac3d78 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/stat/descriptive/moment/Mean.java @@ -0,0 +1,286 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.stat.descriptive.moment; + +import java.io.Serializable; + +import org.apache.commons.math3.exception.MathIllegalArgumentException; +import org.apache.commons.math3.exception.NullArgumentException; +import org.apache.commons.math3.stat.descriptive.AbstractStorelessUnivariateStatistic; +import org.apache.commons.math3.stat.descriptive.WeightedEvaluation; +import org.apache.commons.math3.stat.descriptive.summary.Sum; +import org.apache.commons.math3.util.MathUtils; + +/** + * <p>Computes the arithmetic mean of a set of values. Uses the definitional + * formula:</p> + * <p> + * mean = sum(x_i) / n + * </p> + * <p>where <code>n</code> is the number of observations. + * </p> + * <p>When {@link #increment(double)} is used to add data incrementally from a + * stream of (unstored) values, the value of the statistic that + * {@link #getResult()} returns is computed using the following recursive + * updating algorithm: </p> + * <ol> + * <li>Initialize <code>m = </code> the first value</li> + * <li>For each additional value, update using <br> + * <code>m = m + (new value - m) / (number of observations)</code></li> + * </ol> + * <p> If {@link #evaluate(double[])} is used to compute the mean of an array + * of stored values, a two-pass, corrected algorithm is used, starting with + * the definitional formula computed using the array of stored values and then + * correcting this by adding the mean deviation of the data values from the + * arithmetic mean. See, e.g. "Comparison of Several Algorithms for Computing + * Sample Means and Variances," Robert F. Ling, Journal of the American + * Statistical Association, Vol. 69, No. 348 (Dec., 1974), pp. 859-866. </p> + * <p> + * Returns <code>Double.NaN</code> if the dataset is empty. Note that + * Double.NaN may also be returned if the input includes NaN and / or infinite + * values. + * </p> + * <strong>Note that this implementation is not synchronized.</strong> If + * multiple threads access an instance of this class concurrently, and at least + * one of the threads invokes the <code>increment()</code> or + * <code>clear()</code> method, it must be synchronized externally. + * + */ +public class Mean extends AbstractStorelessUnivariateStatistic + implements Serializable, WeightedEvaluation { + + /** Serializable version identifier */ + private static final long serialVersionUID = -1296043746617791564L; + + /** First moment on which this statistic is based. */ + protected FirstMoment moment; + + /** + * Determines whether or not this statistic can be incremented or cleared. + * <p> + * Statistics based on (constructed from) external moments cannot + * be incremented or cleared.</p> + */ + protected boolean incMoment; + + /** Constructs a Mean. */ + public Mean() { + incMoment = true; + moment = new FirstMoment(); + } + + /** + * Constructs a Mean with an External Moment. + * + * @param m1 the moment + */ + public Mean(final FirstMoment m1) { + this.moment = m1; + incMoment = false; + } + + /** + * Copy constructor, creates a new {@code Mean} identical + * to the {@code original} + * + * @param original the {@code Mean} instance to copy + * @throws NullArgumentException if original is null + */ + public Mean(Mean original) throws NullArgumentException { + copy(original, this); + } + + /** + * {@inheritDoc} + * <p>Note that when {@link #Mean(FirstMoment)} is used to + * create a Mean, this method does nothing. In that case, the + * FirstMoment should be incremented directly.</p> + */ + @Override + public void increment(final double d) { + if (incMoment) { + moment.increment(d); + } + } + + /** + * {@inheritDoc} + */ + @Override + public void clear() { + if (incMoment) { + moment.clear(); + } + } + + /** + * {@inheritDoc} + */ + @Override + public double getResult() { + return moment.m1; + } + + /** + * {@inheritDoc} + */ + public long getN() { + return moment.getN(); + } + + /** + * Returns the arithmetic mean of the entries in the specified portion of + * the input array, or <code>Double.NaN</code> if the designated subarray + * is empty. + * <p> + * Throws <code>IllegalArgumentException</code> if the array is null.</p> + * <p> + * See {@link Mean} for details on the computing algorithm.</p> + * + * @param values the input array + * @param begin index of the first array element to include + * @param length the number of elements to include + * @return the mean of the values or Double.NaN if length = 0 + * @throws MathIllegalArgumentException if the array is null or the array index + * parameters are not valid + */ + @Override + public double evaluate(final double[] values,final int begin, final int length) + throws MathIllegalArgumentException { + if (test(values, begin, length)) { + Sum sum = new Sum(); + double sampleSize = length; + + // Compute initial estimate using definitional formula + double xbar = sum.evaluate(values, begin, length) / sampleSize; + + // Compute correction factor in second pass + double correction = 0; + for (int i = begin; i < begin + length; i++) { + correction += values[i] - xbar; + } + return xbar + (correction/sampleSize); + } + return Double.NaN; + } + + /** + * Returns the weighted arithmetic mean of the entries in the specified portion of + * the input array, or <code>Double.NaN</code> if the designated subarray + * is empty. + * <p> + * Throws <code>IllegalArgumentException</code> if either array is null.</p> + * <p> + * See {@link Mean} for details on the computing algorithm. The two-pass algorithm + * described above is used here, with weights applied in computing both the original + * estimate and the correction factor.</p> + * <p> + * Throws <code>IllegalArgumentException</code> if any of the following are true: + * <ul><li>the values array is null</li> + * <li>the weights array is null</li> + * <li>the weights array does not have the same length as the values array</li> + * <li>the weights array contains one or more infinite values</li> + * <li>the weights array contains one or more NaN values</li> + * <li>the weights array contains negative values</li> + * <li>the start and length arguments do not determine a valid array</li> + * </ul></p> + * + * @param values the input array + * @param weights the weights array + * @param begin index of the first array element to include + * @param length the number of elements to include + * @return the mean of the values or Double.NaN if length = 0 + * @throws MathIllegalArgumentException if the parameters are not valid + * @since 2.1 + */ + public double evaluate(final double[] values, final double[] weights, + final int begin, final int length) throws MathIllegalArgumentException { + if (test(values, weights, begin, length)) { + Sum sum = new Sum(); + + // Compute initial estimate using definitional formula + double sumw = sum.evaluate(weights,begin,length); + double xbarw = sum.evaluate(values, weights, begin, length) / sumw; + + // Compute correction factor in second pass + double correction = 0; + for (int i = begin; i < begin + length; i++) { + correction += weights[i] * (values[i] - xbarw); + } + return xbarw + (correction/sumw); + } + return Double.NaN; + } + + /** + * Returns the weighted arithmetic mean of the entries in the input array. + * <p> + * Throws <code>MathIllegalArgumentException</code> if either array is null.</p> + * <p> + * See {@link Mean} for details on the computing algorithm. The two-pass algorithm + * described above is used here, with weights applied in computing both the original + * estimate and the correction factor.</p> + * <p> + * Throws <code>MathIllegalArgumentException</code> if any of the following are true: + * <ul><li>the values array is null</li> + * <li>the weights array is null</li> + * <li>the weights array does not have the same length as the values array</li> + * <li>the weights array contains one or more infinite values</li> + * <li>the weights array contains one or more NaN values</li> + * <li>the weights array contains negative values</li> + * </ul></p> + * + * @param values the input array + * @param weights the weights array + * @return the mean of the values or Double.NaN if length = 0 + * @throws MathIllegalArgumentException if the parameters are not valid + * @since 2.1 + */ + public double evaluate(final double[] values, final double[] weights) + throws MathIllegalArgumentException { + return evaluate(values, weights, 0, values.length); + } + + /** + * {@inheritDoc} + */ + @Override + public Mean copy() { + Mean result = new Mean(); + // No try-catch or advertised exception because args are guaranteed non-null + copy(this, result); + return result; + } + + + /** + * Copies source to dest. + * <p>Neither source nor dest can be null.</p> + * + * @param source Mean to copy + * @param dest Mean to copy to + * @throws NullArgumentException if either source or dest is null + */ + public static void copy(Mean source, Mean dest) + throws NullArgumentException { + MathUtils.checkNotNull(source); + MathUtils.checkNotNull(dest); + dest.setData(source.getDataRef()); + dest.incMoment = source.incMoment; + dest.moment = source.moment.copy(); + } +} diff --git a/src/main/java/org/apache/commons/math3/stat/descriptive/moment/SecondMoment.java b/src/main/java/org/apache/commons/math3/stat/descriptive/moment/SecondMoment.java new file mode 100644 index 0000000..12715c0 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/stat/descriptive/moment/SecondMoment.java @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.stat.descriptive.moment; + +import java.io.Serializable; + +import org.apache.commons.math3.exception.NullArgumentException; +import org.apache.commons.math3.util.MathUtils; + +/** + * Computes a statistic related to the Second Central Moment. Specifically, + * what is computed is the sum of squared deviations from the sample mean. + * <p> + * The following recursive updating formula is used:</p> + * <p> + * Let <ul> + * <li> dev = (current obs - previous mean) </li> + * <li> n = number of observations (including current obs) </li> + * </ul> + * Then</p> + * <p> + * new value = old value + dev^2 * (n -1) / n.</p> + * <p> + * Returns <code>Double.NaN</code> if no data values have been added and + * returns <code>0</code> if there is just one value in the data set. + * Note that Double.NaN may also be returned if the input includes NaN + * and / or infinite values.</p> + * <p> + * <strong>Note that this implementation is not synchronized.</strong> If + * multiple threads access an instance of this class concurrently, and at least + * one of the threads invokes the <code>increment()</code> or + * <code>clear()</code> method, it must be synchronized externally.</p> + * + */ +public class SecondMoment extends FirstMoment implements Serializable { + + /** Serializable version identifier */ + private static final long serialVersionUID = 3942403127395076445L; + + /** second moment of values that have been added */ + protected double m2; + + /** + * Create a SecondMoment instance + */ + public SecondMoment() { + super(); + m2 = Double.NaN; + } + + /** + * Copy constructor, creates a new {@code SecondMoment} identical + * to the {@code original} + * + * @param original the {@code SecondMoment} instance to copy + * @throws NullArgumentException if original is null + */ + public SecondMoment(SecondMoment original) + throws NullArgumentException { + super(original); + this.m2 = original.m2; + } + + /** + * {@inheritDoc} + */ + @Override + public void increment(final double d) { + if (n < 1) { + m1 = m2 = 0.0; + } + super.increment(d); + m2 += ((double) n - 1) * dev * nDev; + } + + /** + * {@inheritDoc} + */ + @Override + public void clear() { + super.clear(); + m2 = Double.NaN; + } + + /** + * {@inheritDoc} + */ + @Override + public double getResult() { + return m2; + } + + /** + * {@inheritDoc} + */ + @Override + public SecondMoment copy() { + SecondMoment result = new SecondMoment(); + // no try-catch or advertised NAE because args are guaranteed non-null + copy(this, result); + return result; + } + + /** + * Copies source to dest. + * <p>Neither source nor dest can be null.</p> + * + * @param source SecondMoment to copy + * @param dest SecondMoment to copy to + * @throws NullArgumentException if either source or dest is null + */ + public static void copy(SecondMoment source, SecondMoment dest) + throws NullArgumentException { + MathUtils.checkNotNull(source); + MathUtils.checkNotNull(dest); + FirstMoment.copy(source, dest); + dest.m2 = source.m2; + } + +} diff --git a/src/main/java/org/apache/commons/math3/stat/descriptive/moment/SemiVariance.java b/src/main/java/org/apache/commons/math3/stat/descriptive/moment/SemiVariance.java new file mode 100644 index 0000000..563119a --- /dev/null +++ b/src/main/java/org/apache/commons/math3/stat/descriptive/moment/SemiVariance.java @@ -0,0 +1,369 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.math3.stat.descriptive.moment; + +import java.io.Serializable; + +import org.apache.commons.math3.exception.MathIllegalArgumentException; +import org.apache.commons.math3.exception.NullArgumentException; +import org.apache.commons.math3.stat.descriptive.AbstractUnivariateStatistic; +import org.apache.commons.math3.util.MathUtils; + +/** + * <p>Computes the semivariance of a set of values with respect to a given cutoff value. + * We define the <i>downside semivariance</i> of a set of values <code>x</code> + * against the <i>cutoff value</i> <code>cutoff</code> to be <br/> + * <code>Σ (x[i] - target)<sup>2</sup> / df</code> <br/> + * where the sum is taken over all <code>i</code> such that <code>x[i] < cutoff</code> + * and <code>df</code> is the length of <code>x</code> (non-bias-corrected) or + * one less than this number (bias corrected). The <i>upside semivariance</i> + * is defined similarly, with the sum taken over values of <code>x</code> that + * exceed the cutoff value.</p> + * + * <p>The cutoff value defaults to the mean, bias correction defaults to <code>true</code> + * and the "variance direction" (upside or downside) defaults to downside. The variance direction + * and bias correction may be set using property setters or their values can provided as + * parameters to {@link #evaluate(double[], double, Direction, boolean, int, int)}.</p> + * + * <p>If the input array is null, <code>evaluate</code> methods throw + * <code>IllegalArgumentException.</code> If the array has length 1, <code>0</code> + * is returned, regardless of the value of the <code>cutoff.</code> + * + * <p><strong>Note that this class is not intended to be threadsafe.</strong> If + * multiple threads access an instance of this class concurrently, and one or + * more of these threads invoke property setters, external synchronization must + * be provided to ensure correct results.</p> + * + * @since 2.1 + */ +public class SemiVariance extends AbstractUnivariateStatistic implements Serializable { + + /** + * The UPSIDE Direction is used to specify that the observations above the + * cutoff point will be used to calculate SemiVariance. + */ + public static final Direction UPSIDE_VARIANCE = Direction.UPSIDE; + + /** + * The DOWNSIDE Direction is used to specify that the observations below + * the cutoff point will be used to calculate SemiVariance + */ + public static final Direction DOWNSIDE_VARIANCE = Direction.DOWNSIDE; + + /** Serializable version identifier */ + private static final long serialVersionUID = -2653430366886024994L; + + /** + * Determines whether or not bias correction is applied when computing the + * value of the statisic. True means that bias is corrected. + */ + private boolean biasCorrected = true; + + /** + * Determines whether to calculate downside or upside SemiVariance. + */ + private Direction varianceDirection = Direction.DOWNSIDE; + + /** + * Constructs a SemiVariance with default (true) <code>biasCorrected</code> + * property and default (Downside) <code>varianceDirection</code> property. + */ + public SemiVariance() { + } + + /** + * Constructs a SemiVariance with the specified <code>biasCorrected</code> + * property and default (Downside) <code>varianceDirection</code> property. + * + * @param biasCorrected setting for bias correction - true means + * bias will be corrected and is equivalent to using the argumentless + * constructor + */ + public SemiVariance(final boolean biasCorrected) { + this.biasCorrected = biasCorrected; + } + + + /** + * Constructs a SemiVariance with the specified <code>Direction</code> property + * and default (true) <code>biasCorrected</code> property + * + * @param direction setting for the direction of the SemiVariance + * to calculate + */ + public SemiVariance(final Direction direction) { + this.varianceDirection = direction; + } + + + /** + * Constructs a SemiVariance with the specified <code>isBiasCorrected</code> + * property and the specified <code>Direction</code> property. + * + * @param corrected setting for bias correction - true means + * bias will be corrected and is equivalent to using the argumentless + * constructor + * + * @param direction setting for the direction of the SemiVariance + * to calculate + */ + public SemiVariance(final boolean corrected, final Direction direction) { + this.biasCorrected = corrected; + this.varianceDirection = direction; + } + + + /** + * Copy constructor, creates a new {@code SemiVariance} identical + * to the {@code original} + * + * @param original the {@code SemiVariance} instance to copy + * @throws NullArgumentException if original is null + */ + public SemiVariance(final SemiVariance original) throws NullArgumentException { + copy(original, this); + } + + + /** + * {@inheritDoc} + */ + @Override + public SemiVariance copy() { + SemiVariance result = new SemiVariance(); + // No try-catch or advertised exception because args are guaranteed non-null + copy(this, result); + return result; + } + + + /** + * Copies source to dest. + * <p>Neither source nor dest can be null.</p> + * + * @param source SemiVariance to copy + * @param dest SemiVariance to copy to + * @throws NullArgumentException if either source or dest is null + */ + public static void copy(final SemiVariance source, SemiVariance dest) + throws NullArgumentException { + MathUtils.checkNotNull(source); + MathUtils.checkNotNull(dest); + dest.setData(source.getDataRef()); + dest.biasCorrected = source.biasCorrected; + dest.varianceDirection = source.varianceDirection; + } + + /** + * <p>Returns the {@link SemiVariance} of the designated values against the mean, using + * instance properties varianceDirection and biasCorrection.</p> + * + * <p>Returns <code>NaN</code> if the array is empty and throws + * <code>IllegalArgumentException</code> if the array is null.</p> + * + * @param values the input array + * @param start index of the first array element to include + * @param length the number of elements to include + * @return the SemiVariance + * @throws MathIllegalArgumentException if the parameters are not valid + * + */ + @Override + public double evaluate(final double[] values, final int start, final int length) + throws MathIllegalArgumentException { + double m = (new Mean()).evaluate(values, start, length); + return evaluate(values, m, varianceDirection, biasCorrected, 0, values.length); + } + + + /** + * This method calculates {@link SemiVariance} for the entire array against the mean, using + * the current value of the biasCorrection instance property. + * + * @param values the input array + * @param direction the {@link Direction} of the semivariance + * @return the SemiVariance + * @throws MathIllegalArgumentException if values is null + * + */ + public double evaluate(final double[] values, Direction direction) + throws MathIllegalArgumentException { + double m = (new Mean()).evaluate(values); + return evaluate (values, m, direction, biasCorrected, 0, values.length); + } + + /** + * <p>Returns the {@link SemiVariance} of the designated values against the cutoff, using + * instance properties variancDirection and biasCorrection.</p> + * + * <p>Returns <code>NaN</code> if the array is empty and throws + * <code>MathIllegalArgumentException</code> if the array is null.</p> + * + * @param values the input array + * @param cutoff the reference point + * @return the SemiVariance + * @throws MathIllegalArgumentException if values is null + */ + public double evaluate(final double[] values, final double cutoff) + throws MathIllegalArgumentException { + return evaluate(values, cutoff, varianceDirection, biasCorrected, 0, values.length); + } + + /** + * <p>Returns the {@link SemiVariance} of the designated values against the cutoff in the + * given direction, using the current value of the biasCorrection instance property.</p> + * + * <p>Returns <code>NaN</code> if the array is empty and throws + * <code>MathIllegalArgumentException</code> if the array is null.</p> + * + * @param values the input array + * @param cutoff the reference point + * @param direction the {@link Direction} of the semivariance + * @return the SemiVariance + * @throws MathIllegalArgumentException if values is null + */ + public double evaluate(final double[] values, final double cutoff, final Direction direction) + throws MathIllegalArgumentException { + return evaluate(values, cutoff, direction, biasCorrected, 0, values.length); + } + + + /** + * <p>Returns the {@link SemiVariance} of the designated values against the cutoff + * in the given direction with the provided bias correction.</p> + * + * <p>Returns <code>NaN</code> if the array is empty and throws + * <code>IllegalArgumentException</code> if the array is null.</p> + * + * @param values the input array + * @param cutoff the reference point + * @param direction the {@link Direction} of the semivariance + * @param corrected the BiasCorrection flag + * @param start index of the first array element to include + * @param length the number of elements to include + * @return the SemiVariance + * @throws MathIllegalArgumentException if the parameters are not valid + * + */ + public double evaluate (final double[] values, final double cutoff, final Direction direction, + final boolean corrected, final int start, final int length) throws MathIllegalArgumentException { + + test(values, start, length); + if (values.length == 0) { + return Double.NaN; + } else { + if (values.length == 1) { + return 0.0; + } else { + final boolean booleanDirection = direction.getDirection(); + + double dev = 0.0; + double sumsq = 0.0; + for (int i = start; i < length; i++) { + if ((values[i] > cutoff) == booleanDirection) { + dev = values[i] - cutoff; + sumsq += dev * dev; + } + } + + if (corrected) { + return sumsq / (length - 1.0); + } else { + return sumsq / length; + } + } + } + } + + /** + * Returns true iff biasCorrected property is set to true. + * + * @return the value of biasCorrected. + */ + public boolean isBiasCorrected() { + return biasCorrected; + } + + /** + * Sets the biasCorrected property. + * + * @param biasCorrected new biasCorrected property value + */ + public void setBiasCorrected(boolean biasCorrected) { + this.biasCorrected = biasCorrected; + } + + /** + * Returns the varianceDirection property. + * + * @return the varianceDirection + */ + public Direction getVarianceDirection () { + return varianceDirection; + } + + /** + * Sets the variance direction + * + * @param varianceDirection the direction of the semivariance + */ + public void setVarianceDirection(Direction varianceDirection) { + this.varianceDirection = varianceDirection; + } + + /** + * The direction of the semivariance - either upside or downside. The direction + * is represented by boolean, with true corresponding to UPSIDE semivariance. + */ + public enum Direction { + /** + * The UPSIDE Direction is used to specify that the observations above the + * cutoff point will be used to calculate SemiVariance + */ + UPSIDE (true), + + /** + * The DOWNSIDE Direction is used to specify that the observations below + * the cutoff point will be used to calculate SemiVariance + */ + DOWNSIDE (false); + + /** + * boolean value UPSIDE <-> true + */ + private boolean direction; + + /** + * Create a Direction with the given value. + * + * @param b boolean value representing the Direction. True corresponds to UPSIDE. + */ + Direction (boolean b) { + direction = b; + } + + /** + * Returns the value of this Direction. True corresponds to UPSIDE. + * + * @return true if direction is UPSIDE; false otherwise + */ + boolean getDirection () { + return direction; + } + } +} diff --git a/src/main/java/org/apache/commons/math3/stat/descriptive/moment/Skewness.java b/src/main/java/org/apache/commons/math3/stat/descriptive/moment/Skewness.java new file mode 100644 index 0000000..b4703eb --- /dev/null +++ b/src/main/java/org/apache/commons/math3/stat/descriptive/moment/Skewness.java @@ -0,0 +1,228 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.stat.descriptive.moment; + +import java.io.Serializable; + +import org.apache.commons.math3.exception.MathIllegalArgumentException; +import org.apache.commons.math3.exception.NullArgumentException; +import org.apache.commons.math3.stat.descriptive.AbstractStorelessUnivariateStatistic; +import org.apache.commons.math3.util.FastMath; +import org.apache.commons.math3.util.MathUtils; + +/** + * Computes the skewness of the available values. + * <p> + * We use the following (unbiased) formula to define skewness:</p> + * <p> + * skewness = [n / (n -1) (n - 2)] sum[(x_i - mean)^3] / std^3 </p> + * <p> + * where n is the number of values, mean is the {@link Mean} and std is the + * {@link StandardDeviation} </p> + * <p> + * Note that this statistic is undefined for n < 3. <code>Double.Nan</code> + * is returned when there is not sufficient data to compute the statistic. + * Double.NaN may also be returned if the input includes NaN and / or + * infinite values.</p> + * <p> + * <strong>Note that this implementation is not synchronized.</strong> If + * multiple threads access an instance of this class concurrently, and at least + * one of the threads invokes the <code>increment()</code> or + * <code>clear()</code> method, it must be synchronized externally. </p> + * + */ +public class Skewness extends AbstractStorelessUnivariateStatistic implements Serializable { + + /** Serializable version identifier */ + private static final long serialVersionUID = 7101857578996691352L; + + /** Third moment on which this statistic is based */ + protected ThirdMoment moment = null; + + /** + * Determines whether or not this statistic can be incremented or cleared. + * <p> + * Statistics based on (constructed from) external moments cannot + * be incremented or cleared.</p> + */ + protected boolean incMoment; + + /** + * Constructs a Skewness + */ + public Skewness() { + incMoment = true; + moment = new ThirdMoment(); + } + + /** + * Constructs a Skewness with an external moment + * @param m3 external moment + */ + public Skewness(final ThirdMoment m3) { + incMoment = false; + this.moment = m3; + } + + /** + * Copy constructor, creates a new {@code Skewness} identical + * to the {@code original} + * + * @param original the {@code Skewness} instance to copy + * @throws NullArgumentException if original is null + */ + public Skewness(Skewness original) throws NullArgumentException { + copy(original, this); + } + + /** + * {@inheritDoc} + * <p>Note that when {@link #Skewness(ThirdMoment)} is used to + * create a Skewness, this method does nothing. In that case, the + * ThirdMoment should be incremented directly.</p> + */ + @Override + public void increment(final double d) { + if (incMoment) { + moment.increment(d); + } + } + + /** + * Returns the value of the statistic based on the values that have been added. + * <p> + * See {@link Skewness} for the definition used in the computation.</p> + * + * @return the skewness of the available values. + */ + @Override + public double getResult() { + + if (moment.n < 3) { + return Double.NaN; + } + double variance = moment.m2 / (moment.n - 1); + if (variance < 10E-20) { + return 0.0d; + } else { + double n0 = moment.getN(); + return (n0 * moment.m3) / + ((n0 - 1) * (n0 -2) * FastMath.sqrt(variance) * variance); + } + } + + /** + * {@inheritDoc} + */ + public long getN() { + return moment.getN(); + } + + /** + * {@inheritDoc} + */ + @Override + public void clear() { + if (incMoment) { + moment.clear(); + } + } + + /** + * Returns the Skewness of the entries in the specifed portion of the + * input array. + * <p> + * See {@link Skewness} for the definition used in the computation.</p> + * <p> + * Throws <code>IllegalArgumentException</code> if the array is null.</p> + * + * @param values the input array + * @param begin the index of the first array element to include + * @param length the number of elements to include + * @return the skewness of the values or Double.NaN if length is less than + * 3 + * @throws MathIllegalArgumentException if the array is null or the array index + * parameters are not valid + */ + @Override + public double evaluate(final double[] values,final int begin, + final int length) throws MathIllegalArgumentException { + + // Initialize the skewness + double skew = Double.NaN; + + if (test(values, begin, length) && length > 2 ){ + Mean mean = new Mean(); + // Get the mean and the standard deviation + double m = mean.evaluate(values, begin, length); + + // Calc the std, this is implemented here instead + // of using the standardDeviation method eliminate + // a duplicate pass to get the mean + double accum = 0.0; + double accum2 = 0.0; + for (int i = begin; i < begin + length; i++) { + final double d = values[i] - m; + accum += d * d; + accum2 += d; + } + final double variance = (accum - (accum2 * accum2 / length)) / (length - 1); + + double accum3 = 0.0; + for (int i = begin; i < begin + length; i++) { + final double d = values[i] - m; + accum3 += d * d * d; + } + accum3 /= variance * FastMath.sqrt(variance); + + // Get N + double n0 = length; + + // Calculate skewness + skew = (n0 / ((n0 - 1) * (n0 - 2))) * accum3; + } + return skew; + } + + /** + * {@inheritDoc} + */ + @Override + public Skewness copy() { + Skewness result = new Skewness(); + // No try-catch or advertised exception because args are guaranteed non-null + copy(this, result); + return result; + } + + /** + * Copies source to dest. + * <p>Neither source nor dest can be null.</p> + * + * @param source Skewness to copy + * @param dest Skewness to copy to + * @throws NullArgumentException if either source or dest is null + */ + public static void copy(Skewness source, Skewness dest) + throws NullArgumentException { + MathUtils.checkNotNull(source); + MathUtils.checkNotNull(dest); + dest.setData(source.getDataRef()); + dest.moment = new ThirdMoment(source.moment.copy()); + dest.incMoment = source.incMoment; + } +} diff --git a/src/main/java/org/apache/commons/math3/stat/descriptive/moment/StandardDeviation.java b/src/main/java/org/apache/commons/math3/stat/descriptive/moment/StandardDeviation.java new file mode 100644 index 0000000..a6248c5 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/stat/descriptive/moment/StandardDeviation.java @@ -0,0 +1,280 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.stat.descriptive.moment; + +import java.io.Serializable; + +import org.apache.commons.math3.exception.MathIllegalArgumentException; +import org.apache.commons.math3.exception.NullArgumentException; +import org.apache.commons.math3.stat.descriptive.AbstractStorelessUnivariateStatistic; +import org.apache.commons.math3.util.FastMath; +import org.apache.commons.math3.util.MathUtils; + +/** + * Computes the sample standard deviation. The standard deviation + * is the positive square root of the variance. This implementation wraps a + * {@link Variance} instance. The <code>isBiasCorrected</code> property of the + * wrapped Variance instance is exposed, so that this class can be used to + * compute both the "sample standard deviation" (the square root of the + * bias-corrected "sample variance") or the "population standard deviation" + * (the square root of the non-bias-corrected "population variance"). See + * {@link Variance} for more information. + * <p> + * <strong>Note that this implementation is not synchronized.</strong> If + * multiple threads access an instance of this class concurrently, and at least + * one of the threads invokes the <code>increment()</code> or + * <code>clear()</code> method, it must be synchronized externally.</p> + * + */ +public class StandardDeviation extends AbstractStorelessUnivariateStatistic + implements Serializable { + + /** Serializable version identifier */ + private static final long serialVersionUID = 5728716329662425188L; + + /** Wrapped Variance instance */ + private Variance variance = null; + + /** + * Constructs a StandardDeviation. Sets the underlying {@link Variance} + * instance's <code>isBiasCorrected</code> property to true. + */ + public StandardDeviation() { + variance = new Variance(); + } + + /** + * Constructs a StandardDeviation from an external second moment. + * + * @param m2 the external moment + */ + public StandardDeviation(final SecondMoment m2) { + variance = new Variance(m2); + } + + /** + * Copy constructor, creates a new {@code StandardDeviation} identical + * to the {@code original} + * + * @param original the {@code StandardDeviation} instance to copy + * @throws NullArgumentException if original is null + */ + public StandardDeviation(StandardDeviation original) throws NullArgumentException { + copy(original, this); + } + + /** + * Contructs a StandardDeviation with the specified value for the + * <code>isBiasCorrected</code> property. If this property is set to + * <code>true</code>, the {@link Variance} used in computing results will + * use the bias-corrected, or "sample" formula. See {@link Variance} for + * details. + * + * @param isBiasCorrected whether or not the variance computation will use + * the bias-corrected formula + */ + public StandardDeviation(boolean isBiasCorrected) { + variance = new Variance(isBiasCorrected); + } + + /** + * Contructs a StandardDeviation with the specified value for the + * <code>isBiasCorrected</code> property and the supplied external moment. + * If <code>isBiasCorrected</code> is set to <code>true</code>, the + * {@link Variance} used in computing results will use the bias-corrected, + * or "sample" formula. See {@link Variance} for details. + * + * @param isBiasCorrected whether or not the variance computation will use + * the bias-corrected formula + * @param m2 the external moment + */ + public StandardDeviation(boolean isBiasCorrected, SecondMoment m2) { + variance = new Variance(isBiasCorrected, m2); + } + + /** + * {@inheritDoc} + */ + @Override + public void increment(final double d) { + variance.increment(d); + } + + /** + * {@inheritDoc} + */ + public long getN() { + return variance.getN(); + } + + /** + * {@inheritDoc} + */ + @Override + public double getResult() { + return FastMath.sqrt(variance.getResult()); + } + + /** + * {@inheritDoc} + */ + @Override + public void clear() { + variance.clear(); + } + + /** + * Returns the Standard Deviation of the entries in the input array, or + * <code>Double.NaN</code> if the array is empty. + * <p> + * Returns 0 for a single-value (i.e. length = 1) sample.</p> + * <p> + * Throws <code>MathIllegalArgumentException</code> if the array is null.</p> + * <p> + * Does not change the internal state of the statistic.</p> + * + * @param values the input array + * @return the standard deviation of the values or Double.NaN if length = 0 + * @throws MathIllegalArgumentException if the array is null + */ + @Override + public double evaluate(final double[] values) throws MathIllegalArgumentException { + return FastMath.sqrt(variance.evaluate(values)); + } + + /** + * Returns the Standard Deviation of the entries in the specified portion of + * the input array, or <code>Double.NaN</code> if the designated subarray + * is empty. + * <p> + * Returns 0 for a single-value (i.e. length = 1) sample. </p> + * <p> + * Throws <code>MathIllegalArgumentException</code> if the array is null.</p> + * <p> + * Does not change the internal state of the statistic.</p> + * + * @param values the input array + * @param begin index of the first array element to include + * @param length the number of elements to include + * @return the standard deviation of the values or Double.NaN if length = 0 + * @throws MathIllegalArgumentException if the array is null or the array index + * parameters are not valid + */ + @Override + public double evaluate(final double[] values, final int begin, final int length) + throws MathIllegalArgumentException { + return FastMath.sqrt(variance.evaluate(values, begin, length)); + } + + /** + * Returns the Standard Deviation of the entries in the specified portion of + * the input array, using the precomputed mean value. Returns + * <code>Double.NaN</code> if the designated subarray is empty. + * <p> + * Returns 0 for a single-value (i.e. length = 1) sample.</p> + * <p> + * The formula used assumes that the supplied mean value is the arithmetic + * mean of the sample data, not a known population parameter. This method + * is supplied only to save computation when the mean has already been + * computed.</p> + * <p> + * Throws <code>IllegalArgumentException</code> if the array is null.</p> + * <p> + * Does not change the internal state of the statistic.</p> + * + * @param values the input array + * @param mean the precomputed mean value + * @param begin index of the first array element to include + * @param length the number of elements to include + * @return the standard deviation of the values or Double.NaN if length = 0 + * @throws MathIllegalArgumentException if the array is null or the array index + * parameters are not valid + */ + public double evaluate(final double[] values, final double mean, + final int begin, final int length) throws MathIllegalArgumentException { + return FastMath.sqrt(variance.evaluate(values, mean, begin, length)); + } + + /** + * Returns the Standard Deviation of the entries in the input array, using + * the precomputed mean value. Returns + * <code>Double.NaN</code> if the designated subarray is empty. + * <p> + * Returns 0 for a single-value (i.e. length = 1) sample.</p> + * <p> + * The formula used assumes that the supplied mean value is the arithmetic + * mean of the sample data, not a known population parameter. This method + * is supplied only to save computation when the mean has already been + * computed.</p> + * <p> + * Throws <code>MathIllegalArgumentException</code> if the array is null.</p> + * <p> + * Does not change the internal state of the statistic.</p> + * + * @param values the input array + * @param mean the precomputed mean value + * @return the standard deviation of the values or Double.NaN if length = 0 + * @throws MathIllegalArgumentException if the array is null + */ + public double evaluate(final double[] values, final double mean) + throws MathIllegalArgumentException { + return FastMath.sqrt(variance.evaluate(values, mean)); + } + + /** + * @return Returns the isBiasCorrected. + */ + public boolean isBiasCorrected() { + return variance.isBiasCorrected(); + } + + /** + * @param isBiasCorrected The isBiasCorrected to set. + */ + public void setBiasCorrected(boolean isBiasCorrected) { + variance.setBiasCorrected(isBiasCorrected); + } + + /** + * {@inheritDoc} + */ + @Override + public StandardDeviation copy() { + StandardDeviation result = new StandardDeviation(); + // No try-catch or advertised exception because args are guaranteed non-null + copy(this, result); + return result; + } + + + /** + * Copies source to dest. + * <p>Neither source nor dest can be null.</p> + * + * @param source StandardDeviation to copy + * @param dest StandardDeviation to copy to + * @throws NullArgumentException if either source or dest is null + */ + public static void copy(StandardDeviation source, StandardDeviation dest) + throws NullArgumentException { + MathUtils.checkNotNull(source); + MathUtils.checkNotNull(dest); + dest.setData(source.getDataRef()); + dest.variance = source.variance.copy(); + } + +} diff --git a/src/main/java/org/apache/commons/math3/stat/descriptive/moment/ThirdMoment.java b/src/main/java/org/apache/commons/math3/stat/descriptive/moment/ThirdMoment.java new file mode 100644 index 0000000..43a9ca1 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/stat/descriptive/moment/ThirdMoment.java @@ -0,0 +1,148 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.stat.descriptive.moment; + +import java.io.Serializable; + +import org.apache.commons.math3.exception.NullArgumentException; +import org.apache.commons.math3.util.MathUtils; + + +/** + * Computes a statistic related to the Third Central Moment. Specifically, + * what is computed is the sum of cubed deviations from the sample mean. + * <p> + * The following recursive updating formula is used:</p> + * <p> + * Let <ul> + * <li> dev = (current obs - previous mean) </li> + * <li> m2 = previous value of {@link SecondMoment} </li> + * <li> n = number of observations (including current obs) </li> + * </ul> + * Then</p> + * <p> + * new value = old value - 3 * (dev/n) * m2 + (n-1) * (n -2) * (dev^3/n^2)</p> + * <p> + * Returns <code>Double.NaN</code> if no data values have been added and + * returns <code>0</code> if there is just one value in the data set. + * Note that Double.NaN may also be returned if the input includes NaN + * and / or infinite values.</p> + * <p> + * <strong>Note that this implementation is not synchronized.</strong> If + * multiple threads access an instance of this class concurrently, and at least + * one of the threads invokes the <code>increment()</code> or + * <code>clear()</code> method, it must be synchronized externally.</p> + * + */ +class ThirdMoment extends SecondMoment implements Serializable { + + /** Serializable version identifier */ + private static final long serialVersionUID = -7818711964045118679L; + + /** third moment of values that have been added */ + protected double m3; + + /** + * Square of deviation of most recently added value from previous first + * moment, normalized by previous sample size. Retained to prevent + * repeated computation in higher order moments. nDevSq = nDev * nDev. + */ + protected double nDevSq; + + /** + * Create a FourthMoment instance + */ + ThirdMoment() { + super(); + m3 = Double.NaN; + nDevSq = Double.NaN; + } + + /** + * Copy constructor, creates a new {@code ThirdMoment} identical + * to the {@code original} + * + * @param original the {@code ThirdMoment} instance to copy + * @throws NullArgumentException if orginal is null + */ + ThirdMoment(ThirdMoment original) throws NullArgumentException { + copy(original, this); + } + + /** + * {@inheritDoc} + */ + @Override + public void increment(final double d) { + if (n < 1) { + m3 = m2 = m1 = 0.0; + } + + double prevM2 = m2; + super.increment(d); + nDevSq = nDev * nDev; + double n0 = n; + m3 = m3 - 3.0 * nDev * prevM2 + (n0 - 1) * (n0 - 2) * nDevSq * dev; + } + + /** + * {@inheritDoc} + */ + @Override + public double getResult() { + return m3; + } + + /** + * {@inheritDoc} + */ + @Override + public void clear() { + super.clear(); + m3 = Double.NaN; + nDevSq = Double.NaN; + } + + /** + * {@inheritDoc} + */ + @Override + public ThirdMoment copy() { + ThirdMoment result = new ThirdMoment(); + // No try-catch or advertised exception because args are guaranteed non-null + copy(this, result); + return result; + } + + /** + * Copies source to dest. + * <p>Neither source nor dest can be null.</p> + * + * @param source ThirdMoment to copy + * @param dest ThirdMoment to copy to + * @throws NullArgumentException if either source or dest is null + */ + public static void copy(ThirdMoment source, ThirdMoment dest) + throws NullArgumentException { + MathUtils.checkNotNull(source); + MathUtils.checkNotNull(dest); + SecondMoment.copy(source, dest); + dest.m3 = source.m3; + dest.nDevSq = source.nDevSq; + } + +} diff --git a/src/main/java/org/apache/commons/math3/stat/descriptive/moment/Variance.java b/src/main/java/org/apache/commons/math3/stat/descriptive/moment/Variance.java new file mode 100644 index 0000000..1ba48e9 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/stat/descriptive/moment/Variance.java @@ -0,0 +1,627 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.stat.descriptive.moment; + +import java.io.Serializable; + +import org.apache.commons.math3.exception.MathIllegalArgumentException; +import org.apache.commons.math3.exception.NullArgumentException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.stat.descriptive.WeightedEvaluation; +import org.apache.commons.math3.stat.descriptive.AbstractStorelessUnivariateStatistic; +import org.apache.commons.math3.util.MathUtils; + +/** + * Computes the variance of the available values. By default, the unbiased + * "sample variance" definitional formula is used: + * <p> + * variance = sum((x_i - mean)^2) / (n - 1) </p> + * <p> + * where mean is the {@link Mean} and <code>n</code> is the number + * of sample observations.</p> + * <p> + * The definitional formula does not have good numerical properties, so + * this implementation does not compute the statistic using the definitional + * formula. <ul> + * <li> The <code>getResult</code> method computes the variance using + * updating formulas based on West's algorithm, as described in + * <a href="http://doi.acm.org/10.1145/359146.359152"> Chan, T. F. and + * J. G. Lewis 1979, <i>Communications of the ACM</i>, + * vol. 22 no. 9, pp. 526-531.</a></li> + * <li> The <code>evaluate</code> methods leverage the fact that they have the + * full array of values in memory to execute a two-pass algorithm. + * Specifically, these methods use the "corrected two-pass algorithm" from + * Chan, Golub, Levesque, <i>Algorithms for Computing the Sample Variance</i>, + * American Statistician, vol. 37, no. 3 (1983) pp. 242-247.</li></ul> + * Note that adding values using <code>increment</code> or + * <code>incrementAll</code> and then executing <code>getResult</code> will + * sometimes give a different, less accurate, result than executing + * <code>evaluate</code> with the full array of values. The former approach + * should only be used when the full array of values is not available.</p> + * <p> + * The "population variance" ( sum((x_i - mean)^2) / n ) can also + * be computed using this statistic. The <code>isBiasCorrected</code> + * property determines whether the "population" or "sample" value is + * returned by the <code>evaluate</code> and <code>getResult</code> methods. + * To compute population variances, set this property to <code>false.</code> + * </p> + * <p> + * <strong>Note that this implementation is not synchronized.</strong> If + * multiple threads access an instance of this class concurrently, and at least + * one of the threads invokes the <code>increment()</code> or + * <code>clear()</code> method, it must be synchronized externally.</p> + * + */ +public class Variance extends AbstractStorelessUnivariateStatistic implements Serializable, WeightedEvaluation { + + /** Serializable version identifier */ + private static final long serialVersionUID = -9111962718267217978L; + + /** SecondMoment is used in incremental calculation of Variance*/ + protected SecondMoment moment = null; + + /** + * Whether or not {@link #increment(double)} should increment + * the internal second moment. When a Variance is constructed with an + * external SecondMoment as a constructor parameter, this property is + * set to false and increments must be applied to the second moment + * directly. + */ + protected boolean incMoment = true; + + /** + * Whether or not bias correction is applied when computing the + * value of the statistic. True means that bias is corrected. See + * {@link Variance} for details on the formula. + */ + private boolean isBiasCorrected = true; + + /** + * Constructs a Variance with default (true) <code>isBiasCorrected</code> + * property. + */ + public Variance() { + moment = new SecondMoment(); + } + + /** + * Constructs a Variance based on an external second moment. + * When this constructor is used, the statistic may only be + * incremented via the moment, i.e., {@link #increment(double)} + * does nothing; whereas {@code m2.increment(value)} increments + * both {@code m2} and the Variance instance constructed from it. + * + * @param m2 the SecondMoment (Third or Fourth moments work + * here as well.) + */ + public Variance(final SecondMoment m2) { + incMoment = false; + this.moment = m2; + } + + /** + * Constructs a Variance with the specified <code>isBiasCorrected</code> + * property + * + * @param isBiasCorrected setting for bias correction - true means + * bias will be corrected and is equivalent to using the argumentless + * constructor + */ + public Variance(boolean isBiasCorrected) { + moment = new SecondMoment(); + this.isBiasCorrected = isBiasCorrected; + } + + /** + * Constructs a Variance with the specified <code>isBiasCorrected</code> + * property and the supplied external second moment. + * + * @param isBiasCorrected setting for bias correction - true means + * bias will be corrected + * @param m2 the SecondMoment (Third or Fourth moments work + * here as well.) + */ + public Variance(boolean isBiasCorrected, SecondMoment m2) { + incMoment = false; + this.moment = m2; + this.isBiasCorrected = isBiasCorrected; + } + + /** + * Copy constructor, creates a new {@code Variance} identical + * to the {@code original} + * + * @param original the {@code Variance} instance to copy + * @throws NullArgumentException if original is null + */ + public Variance(Variance original) throws NullArgumentException { + copy(original, this); + } + + /** + * {@inheritDoc} + * <p>If all values are available, it is more accurate to use + * {@link #evaluate(double[])} rather than adding values one at a time + * using this method and then executing {@link #getResult}, since + * <code>evaluate</code> leverages the fact that is has the full + * list of values together to execute a two-pass algorithm. + * See {@link Variance}.</p> + * + * <p>Note also that when {@link #Variance(SecondMoment)} is used to + * create a Variance, this method does nothing. In that case, the + * SecondMoment should be incremented directly.</p> + */ + @Override + public void increment(final double d) { + if (incMoment) { + moment.increment(d); + } + } + + /** + * {@inheritDoc} + */ + @Override + public double getResult() { + if (moment.n == 0) { + return Double.NaN; + } else if (moment.n == 1) { + return 0d; + } else { + if (isBiasCorrected) { + return moment.m2 / (moment.n - 1d); + } else { + return moment.m2 / (moment.n); + } + } + } + + /** + * {@inheritDoc} + */ + public long getN() { + return moment.getN(); + } + + /** + * {@inheritDoc} + */ + @Override + public void clear() { + if (incMoment) { + moment.clear(); + } + } + + /** + * Returns the variance of the entries in the input array, or + * <code>Double.NaN</code> if the array is empty. + * <p> + * See {@link Variance} for details on the computing algorithm.</p> + * <p> + * Returns 0 for a single-value (i.e. length = 1) sample.</p> + * <p> + * Throws <code>MathIllegalArgumentException</code> if the array is null.</p> + * <p> + * Does not change the internal state of the statistic.</p> + * + * @param values the input array + * @return the variance of the values or Double.NaN if length = 0 + * @throws MathIllegalArgumentException if the array is null + */ + @Override + public double evaluate(final double[] values) throws MathIllegalArgumentException { + if (values == null) { + throw new NullArgumentException(LocalizedFormats.INPUT_ARRAY); + } + return evaluate(values, 0, values.length); + } + + /** + * Returns the variance of the entries in the specified portion of + * the input array, or <code>Double.NaN</code> if the designated subarray + * is empty. Note that Double.NaN may also be returned if the input + * includes NaN and / or infinite values. + * <p> + * See {@link Variance} for details on the computing algorithm.</p> + * <p> + * Returns 0 for a single-value (i.e. length = 1) sample.</p> + * <p> + * Does not change the internal state of the statistic.</p> + * <p> + * Throws <code>MathIllegalArgumentException</code> if the array is null.</p> + * + * @param values the input array + * @param begin index of the first array element to include + * @param length the number of elements to include + * @return the variance of the values or Double.NaN if length = 0 + * @throws MathIllegalArgumentException if the array is null or the array index + * parameters are not valid + */ + @Override + public double evaluate(final double[] values, final int begin, final int length) + throws MathIllegalArgumentException { + + double var = Double.NaN; + + if (test(values, begin, length)) { + clear(); + if (length == 1) { + var = 0.0; + } else if (length > 1) { + Mean mean = new Mean(); + double m = mean.evaluate(values, begin, length); + var = evaluate(values, m, begin, length); + } + } + return var; + } + + /** + * <p>Returns the weighted variance of the entries in the specified portion of + * the input array, or <code>Double.NaN</code> if the designated subarray + * is empty.</p> + * <p> + * Uses the formula <pre> + * Σ(weights[i]*(values[i] - weightedMean)<sup>2</sup>)/(Σ(weights[i]) - 1) + * </pre> + * where weightedMean is the weighted mean</p> + * <p> + * This formula will not return the same result as the unweighted variance when all + * weights are equal, unless all weights are equal to 1. The formula assumes that + * weights are to be treated as "expansion values," as will be the case if for example + * the weights represent frequency counts. To normalize weights so that the denominator + * in the variance computation equals the length of the input vector minus one, use <pre> + * <code>evaluate(values, MathArrays.normalizeArray(weights, values.length)); </code> + * </pre> + * <p> + * Returns 0 for a single-value (i.e. length = 1) sample.</p> + * <p> + * Throws <code>IllegalArgumentException</code> if any of the following are true: + * <ul><li>the values array is null</li> + * <li>the weights array is null</li> + * <li>the weights array does not have the same length as the values array</li> + * <li>the weights array contains one or more infinite values</li> + * <li>the weights array contains one or more NaN values</li> + * <li>the weights array contains negative values</li> + * <li>the start and length arguments do not determine a valid array</li> + * </ul></p> + * <p> + * Does not change the internal state of the statistic.</p> + * <p> + * Throws <code>MathIllegalArgumentException</code> if either array is null.</p> + * + * @param values the input array + * @param weights the weights array + * @param begin index of the first array element to include + * @param length the number of elements to include + * @return the weighted variance of the values or Double.NaN if length = 0 + * @throws MathIllegalArgumentException if the parameters are not valid + * @since 2.1 + */ + public double evaluate(final double[] values, final double[] weights, + final int begin, final int length) throws MathIllegalArgumentException { + + double var = Double.NaN; + + if (test(values, weights,begin, length)) { + clear(); + if (length == 1) { + var = 0.0; + } else if (length > 1) { + Mean mean = new Mean(); + double m = mean.evaluate(values, weights, begin, length); + var = evaluate(values, weights, m, begin, length); + } + } + return var; + } + + /** + * <p> + * Returns the weighted variance of the entries in the the input array.</p> + * <p> + * Uses the formula <pre> + * Σ(weights[i]*(values[i] - weightedMean)<sup>2</sup>)/(Σ(weights[i]) - 1) + * </pre> + * where weightedMean is the weighted mean</p> + * <p> + * This formula will not return the same result as the unweighted variance when all + * weights are equal, unless all weights are equal to 1. The formula assumes that + * weights are to be treated as "expansion values," as will be the case if for example + * the weights represent frequency counts. To normalize weights so that the denominator + * in the variance computation equals the length of the input vector minus one, use <pre> + * <code>evaluate(values, MathArrays.normalizeArray(weights, values.length)); </code> + * </pre> + * <p> + * Returns 0 for a single-value (i.e. length = 1) sample.</p> + * <p> + * Throws <code>MathIllegalArgumentException</code> if any of the following are true: + * <ul><li>the values array is null</li> + * <li>the weights array is null</li> + * <li>the weights array does not have the same length as the values array</li> + * <li>the weights array contains one or more infinite values</li> + * <li>the weights array contains one or more NaN values</li> + * <li>the weights array contains negative values</li> + * </ul></p> + * <p> + * Does not change the internal state of the statistic.</p> + * <p> + * Throws <code>MathIllegalArgumentException</code> if either array is null.</p> + * + * @param values the input array + * @param weights the weights array + * @return the weighted variance of the values + * @throws MathIllegalArgumentException if the parameters are not valid + * @since 2.1 + */ + public double evaluate(final double[] values, final double[] weights) + throws MathIllegalArgumentException { + return evaluate(values, weights, 0, values.length); + } + + /** + * Returns the variance of the entries in the specified portion of + * the input array, using the precomputed mean value. Returns + * <code>Double.NaN</code> if the designated subarray is empty. + * <p> + * See {@link Variance} for details on the computing algorithm.</p> + * <p> + * The formula used assumes that the supplied mean value is the arithmetic + * mean of the sample data, not a known population parameter. This method + * is supplied only to save computation when the mean has already been + * computed.</p> + * <p> + * Returns 0 for a single-value (i.e. length = 1) sample.</p> + * <p> + * Throws <code>MathIllegalArgumentException</code> if the array is null.</p> + * <p> + * Does not change the internal state of the statistic.</p> + * + * @param values the input array + * @param mean the precomputed mean value + * @param begin index of the first array element to include + * @param length the number of elements to include + * @return the variance of the values or Double.NaN if length = 0 + * @throws MathIllegalArgumentException if the array is null or the array index + * parameters are not valid + */ + public double evaluate(final double[] values, final double mean, + final int begin, final int length) throws MathIllegalArgumentException { + + double var = Double.NaN; + + if (test(values, begin, length)) { + if (length == 1) { + var = 0.0; + } else if (length > 1) { + double accum = 0.0; + double dev = 0.0; + double accum2 = 0.0; + for (int i = begin; i < begin + length; i++) { + dev = values[i] - mean; + accum += dev * dev; + accum2 += dev; + } + double len = length; + if (isBiasCorrected) { + var = (accum - (accum2 * accum2 / len)) / (len - 1.0); + } else { + var = (accum - (accum2 * accum2 / len)) / len; + } + } + } + return var; + } + + /** + * Returns the variance of the entries in the input array, using the + * precomputed mean value. Returns <code>Double.NaN</code> if the array + * is empty. + * <p> + * See {@link Variance} for details on the computing algorithm.</p> + * <p> + * If <code>isBiasCorrected</code> is <code>true</code> the formula used + * assumes that the supplied mean value is the arithmetic mean of the + * sample data, not a known population parameter. If the mean is a known + * population parameter, or if the "population" version of the variance is + * desired, set <code>isBiasCorrected</code> to <code>false</code> before + * invoking this method.</p> + * <p> + * Returns 0 for a single-value (i.e. length = 1) sample.</p> + * <p> + * Throws <code>MathIllegalArgumentException</code> if the array is null.</p> + * <p> + * Does not change the internal state of the statistic.</p> + * + * @param values the input array + * @param mean the precomputed mean value + * @return the variance of the values or Double.NaN if the array is empty + * @throws MathIllegalArgumentException if the array is null + */ + public double evaluate(final double[] values, final double mean) throws MathIllegalArgumentException { + return evaluate(values, mean, 0, values.length); + } + + /** + * Returns the weighted variance of the entries in the specified portion of + * the input array, using the precomputed weighted mean value. Returns + * <code>Double.NaN</code> if the designated subarray is empty. + * <p> + * Uses the formula <pre> + * Σ(weights[i]*(values[i] - mean)<sup>2</sup>)/(Σ(weights[i]) - 1) + * </pre></p> + * <p> + * The formula used assumes that the supplied mean value is the weighted arithmetic + * mean of the sample data, not a known population parameter. This method + * is supplied only to save computation when the mean has already been + * computed.</p> + * <p> + * This formula will not return the same result as the unweighted variance when all + * weights are equal, unless all weights are equal to 1. The formula assumes that + * weights are to be treated as "expansion values," as will be the case if for example + * the weights represent frequency counts. To normalize weights so that the denominator + * in the variance computation equals the length of the input vector minus one, use <pre> + * <code>evaluate(values, MathArrays.normalizeArray(weights, values.length), mean); </code> + * </pre> + * <p> + * Returns 0 for a single-value (i.e. length = 1) sample.</p> + * <p> + * Throws <code>MathIllegalArgumentException</code> if any of the following are true: + * <ul><li>the values array is null</li> + * <li>the weights array is null</li> + * <li>the weights array does not have the same length as the values array</li> + * <li>the weights array contains one or more infinite values</li> + * <li>the weights array contains one or more NaN values</li> + * <li>the weights array contains negative values</li> + * <li>the start and length arguments do not determine a valid array</li> + * </ul></p> + * <p> + * Does not change the internal state of the statistic.</p> + * + * @param values the input array + * @param weights the weights array + * @param mean the precomputed weighted mean value + * @param begin index of the first array element to include + * @param length the number of elements to include + * @return the variance of the values or Double.NaN if length = 0 + * @throws MathIllegalArgumentException if the parameters are not valid + * @since 2.1 + */ + public double evaluate(final double[] values, final double[] weights, + final double mean, final int begin, final int length) + throws MathIllegalArgumentException { + + double var = Double.NaN; + + if (test(values, weights, begin, length)) { + if (length == 1) { + var = 0.0; + } else if (length > 1) { + double accum = 0.0; + double dev = 0.0; + double accum2 = 0.0; + for (int i = begin; i < begin + length; i++) { + dev = values[i] - mean; + accum += weights[i] * (dev * dev); + accum2 += weights[i] * dev; + } + + double sumWts = 0; + for (int i = begin; i < begin + length; i++) { + sumWts += weights[i]; + } + + if (isBiasCorrected) { + var = (accum - (accum2 * accum2 / sumWts)) / (sumWts - 1.0); + } else { + var = (accum - (accum2 * accum2 / sumWts)) / sumWts; + } + } + } + return var; + } + + /** + * <p>Returns the weighted variance of the values in the input array, using + * the precomputed weighted mean value.</p> + * <p> + * Uses the formula <pre> + * Σ(weights[i]*(values[i] - mean)<sup>2</sup>)/(Σ(weights[i]) - 1) + * </pre></p> + * <p> + * The formula used assumes that the supplied mean value is the weighted arithmetic + * mean of the sample data, not a known population parameter. This method + * is supplied only to save computation when the mean has already been + * computed.</p> + * <p> + * This formula will not return the same result as the unweighted variance when all + * weights are equal, unless all weights are equal to 1. The formula assumes that + * weights are to be treated as "expansion values," as will be the case if for example + * the weights represent frequency counts. To normalize weights so that the denominator + * in the variance computation equals the length of the input vector minus one, use <pre> + * <code>evaluate(values, MathArrays.normalizeArray(weights, values.length), mean); </code> + * </pre> + * <p> + * Returns 0 for a single-value (i.e. length = 1) sample.</p> + * <p> + * Throws <code>MathIllegalArgumentException</code> if any of the following are true: + * <ul><li>the values array is null</li> + * <li>the weights array is null</li> + * <li>the weights array does not have the same length as the values array</li> + * <li>the weights array contains one or more infinite values</li> + * <li>the weights array contains one or more NaN values</li> + * <li>the weights array contains negative values</li> + * </ul></p> + * <p> + * Does not change the internal state of the statistic.</p> + * + * @param values the input array + * @param weights the weights array + * @param mean the precomputed weighted mean value + * @return the variance of the values or Double.NaN if length = 0 + * @throws MathIllegalArgumentException if the parameters are not valid + * @since 2.1 + */ + public double evaluate(final double[] values, final double[] weights, final double mean) + throws MathIllegalArgumentException { + return evaluate(values, weights, mean, 0, values.length); + } + + /** + * @return Returns the isBiasCorrected. + */ + public boolean isBiasCorrected() { + return isBiasCorrected; + } + + /** + * @param biasCorrected The isBiasCorrected to set. + */ + public void setBiasCorrected(boolean biasCorrected) { + this.isBiasCorrected = biasCorrected; + } + + /** + * {@inheritDoc} + */ + @Override + public Variance copy() { + Variance result = new Variance(); + // No try-catch or advertised exception because parameters are guaranteed non-null + copy(this, result); + return result; + } + + /** + * Copies source to dest. + * <p>Neither source nor dest can be null.</p> + * + * @param source Variance to copy + * @param dest Variance to copy to + * @throws NullArgumentException if either source or dest is null + */ + public static void copy(Variance source, Variance dest) + throws NullArgumentException { + MathUtils.checkNotNull(source); + MathUtils.checkNotNull(dest); + dest.setData(source.getDataRef()); + dest.moment = source.moment.copy(); + dest.isBiasCorrected = source.isBiasCorrected; + dest.incMoment = source.incMoment; + } +} diff --git a/src/main/java/org/apache/commons/math3/stat/descriptive/moment/VectorialCovariance.java b/src/main/java/org/apache/commons/math3/stat/descriptive/moment/VectorialCovariance.java new file mode 100644 index 0000000..7f6f903 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/stat/descriptive/moment/VectorialCovariance.java @@ -0,0 +1,157 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.stat.descriptive.moment; + +import java.io.Serializable; +import java.util.Arrays; + +import org.apache.commons.math3.exception.DimensionMismatchException; +import org.apache.commons.math3.linear.MatrixUtils; +import org.apache.commons.math3.linear.RealMatrix; + +/** + * Returns the covariance matrix of the available vectors. + * @since 1.2 + */ +public class VectorialCovariance implements Serializable { + + /** Serializable version identifier */ + private static final long serialVersionUID = 4118372414238930270L; + + /** Sums for each component. */ + private final double[] sums; + + /** Sums of products for each component. */ + private final double[] productsSums; + + /** Indicator for bias correction. */ + private final boolean isBiasCorrected; + + /** Number of vectors in the sample. */ + private long n; + + /** Constructs a VectorialCovariance. + * @param dimension vectors dimension + * @param isBiasCorrected if true, computed the unbiased sample covariance, + * otherwise computes the biased population covariance + */ + public VectorialCovariance(int dimension, boolean isBiasCorrected) { + sums = new double[dimension]; + productsSums = new double[dimension * (dimension + 1) / 2]; + n = 0; + this.isBiasCorrected = isBiasCorrected; + } + + /** + * Add a new vector to the sample. + * @param v vector to add + * @throws DimensionMismatchException if the vector does not have the right dimension + */ + public void increment(double[] v) throws DimensionMismatchException { + if (v.length != sums.length) { + throw new DimensionMismatchException(v.length, sums.length); + } + int k = 0; + for (int i = 0; i < v.length; ++i) { + sums[i] += v[i]; + for (int j = 0; j <= i; ++j) { + productsSums[k++] += v[i] * v[j]; + } + } + n++; + } + + /** + * Get the covariance matrix. + * @return covariance matrix + */ + public RealMatrix getResult() { + + int dimension = sums.length; + RealMatrix result = MatrixUtils.createRealMatrix(dimension, dimension); + + if (n > 1) { + double c = 1.0 / (n * (isBiasCorrected ? (n - 1) : n)); + int k = 0; + for (int i = 0; i < dimension; ++i) { + for (int j = 0; j <= i; ++j) { + double e = c * (n * productsSums[k++] - sums[i] * sums[j]); + result.setEntry(i, j, e); + result.setEntry(j, i, e); + } + } + } + + return result; + + } + + /** + * Get the number of vectors in the sample. + * @return number of vectors in the sample + */ + public long getN() { + return n; + } + + /** + * Clears the internal state of the Statistic + */ + public void clear() { + n = 0; + Arrays.fill(sums, 0.0); + Arrays.fill(productsSums, 0.0); + } + + /** {@inheritDoc} */ + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + (isBiasCorrected ? 1231 : 1237); + result = prime * result + (int) (n ^ (n >>> 32)); + result = prime * result + Arrays.hashCode(productsSums); + result = prime * result + Arrays.hashCode(sums); + return result; + } + + /** {@inheritDoc} */ + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (!(obj instanceof VectorialCovariance)) { + return false; + } + VectorialCovariance other = (VectorialCovariance) obj; + if (isBiasCorrected != other.isBiasCorrected) { + return false; + } + if (n != other.n) { + return false; + } + if (!Arrays.equals(productsSums, other.productsSums)) { + return false; + } + if (!Arrays.equals(sums, other.sums)) { + return false; + } + return true; + } + +} diff --git a/src/main/java/org/apache/commons/math3/stat/descriptive/moment/VectorialMean.java b/src/main/java/org/apache/commons/math3/stat/descriptive/moment/VectorialMean.java new file mode 100644 index 0000000..e06b3bc --- /dev/null +++ b/src/main/java/org/apache/commons/math3/stat/descriptive/moment/VectorialMean.java @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.stat.descriptive.moment; + +import java.io.Serializable; +import java.util.Arrays; + +import org.apache.commons.math3.exception.DimensionMismatchException; + +/** + * Returns the arithmetic mean of the available vectors. + * @since 1.2 + */ +public class VectorialMean implements Serializable { + + /** Serializable version identifier */ + private static final long serialVersionUID = 8223009086481006892L; + + /** Means for each component. */ + private final Mean[] means; + + /** Constructs a VectorialMean. + * @param dimension vectors dimension + */ + public VectorialMean(int dimension) { + means = new Mean[dimension]; + for (int i = 0; i < dimension; ++i) { + means[i] = new Mean(); + } + } + + /** + * Add a new vector to the sample. + * @param v vector to add + * @throws DimensionMismatchException if the vector does not have the right dimension + */ + public void increment(double[] v) throws DimensionMismatchException { + if (v.length != means.length) { + throw new DimensionMismatchException(v.length, means.length); + } + for (int i = 0; i < v.length; ++i) { + means[i].increment(v[i]); + } + } + + /** + * Get the mean vector. + * @return mean vector + */ + public double[] getResult() { + double[] result = new double[means.length]; + for (int i = 0; i < result.length; ++i) { + result[i] = means[i].getResult(); + } + return result; + } + + /** + * Get the number of vectors in the sample. + * @return number of vectors in the sample + */ + public long getN() { + return (means.length == 0) ? 0 : means[0].getN(); + } + + /** {@inheritDoc} */ + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + Arrays.hashCode(means); + return result; + } + + /** {@inheritDoc} */ + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (!(obj instanceof VectorialMean)) { + return false; + } + VectorialMean other = (VectorialMean) obj; + if (!Arrays.equals(means, other.means)) { + return false; + } + return true; + } + +} diff --git a/src/main/java/org/apache/commons/math3/stat/descriptive/moment/package-info.java b/src/main/java/org/apache/commons/math3/stat/descriptive/moment/package-info.java new file mode 100644 index 0000000..e23ead7 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/stat/descriptive/moment/package-info.java @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Summary statistics based on moments. + */ +package org.apache.commons.math3.stat.descriptive.moment; diff --git a/src/main/java/org/apache/commons/math3/stat/descriptive/package-info.java b/src/main/java/org/apache/commons/math3/stat/descriptive/package-info.java new file mode 100644 index 0000000..92fa5b3 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/stat/descriptive/package-info.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * + * Generic univariate summary statistic objects. + * + * <h3>UnivariateStatistic API Usage Examples:</h3> + * + * <h4>UnivariateStatistic:</h4> + * <code>/∗ evaluation approach ∗/<br/> + * double[] values = new double[] { 1, 2, 3, 4, 5 };<br/> + * <span style="font-weight: bold;">UnivariateStatistic stat = new Mean();</span><br/> + * out.println("mean = " + <span style="font-weight: bold;">stat.evaluate(values)</span>);<br/> + * </code> + * + * <h4>StorelessUnivariateStatistic:</h4> + * <code>/∗ incremental approach ∗/<br/> + * double[] values = new double[] { 1, 2, 3, 4, 5 };<br/> + * <span style="font-weight: bold;">StorelessUnivariateStatistic stat = new Mean();</span><br/> + * out.println("mean before adding a value is NaN = " + <span style="font-weight: bold;">stat.getResult()</span>);<br/> + * for (int i = 0; i < values.length; i++) {<br/> + * <span style="font-weight: bold;">stat.increment(values[i]);</span><br/> + * out.println("current mean = " + <span style="font-weight: bold;">stat2.getResult()</span>);<br/> + * }<br/> + * <span style="font-weight: bold;"> stat.clear();</span><br/> + * out.println("mean after clear is NaN = " + <span style="font-weight: bold;">stat.getResult()</span>); + * </code> + * + */ +package org.apache.commons.math3.stat.descriptive; diff --git a/src/main/java/org/apache/commons/math3/stat/descriptive/rank/Max.java b/src/main/java/org/apache/commons/math3/stat/descriptive/rank/Max.java new file mode 100644 index 0000000..75f145f --- /dev/null +++ b/src/main/java/org/apache/commons/math3/stat/descriptive/rank/Max.java @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.stat.descriptive.rank; + +import java.io.Serializable; + +import org.apache.commons.math3.exception.MathIllegalArgumentException; +import org.apache.commons.math3.exception.NullArgumentException; +import org.apache.commons.math3.stat.descriptive.AbstractStorelessUnivariateStatistic; +import org.apache.commons.math3.util.MathUtils; + +/** + * Returns the maximum of the available values. + * <p> + * <ul> + * <li>The result is <code>NaN</code> iff all values are <code>NaN</code> + * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li> + * <li>If any of the values equals <code>Double.POSITIVE_INFINITY</code>, + * the result is <code>Double.POSITIVE_INFINITY.</code></li> + * </ul></p> +* <p> + * <strong>Note that this implementation is not synchronized.</strong> If + * multiple threads access an instance of this class concurrently, and at least + * one of the threads invokes the <code>increment()</code> or + * <code>clear()</code> method, it must be synchronized externally.</p> + * + */ +public class Max extends AbstractStorelessUnivariateStatistic implements Serializable { + + /** Serializable version identifier */ + private static final long serialVersionUID = -5593383832225844641L; + + /** Number of values that have been added */ + private long n; + + /** Current value of the statistic */ + private double value; + + /** + * Create a Max instance + */ + public Max() { + n = 0; + value = Double.NaN; + } + + /** + * Copy constructor, creates a new {@code Max} identical + * to the {@code original} + * + * @param original the {@code Max} instance to copy + * @throws NullArgumentException if original is null + */ + public Max(Max original) throws NullArgumentException { + copy(original, this); + } + + /** + * {@inheritDoc} + */ + @Override + public void increment(final double d) { + if (d > value || Double.isNaN(value)) { + value = d; + } + n++; + } + + /** + * {@inheritDoc} + */ + @Override + public void clear() { + value = Double.NaN; + n = 0; + } + + /** + * {@inheritDoc} + */ + @Override + public double getResult() { + return value; + } + + /** + * {@inheritDoc} + */ + public long getN() { + return n; + } + + /** + * Returns the maximum of the entries in the specified portion of + * the input array, or <code>Double.NaN</code> if the designated subarray + * is empty. + * <p> + * Throws <code>MathIllegalArgumentException</code> if the array is null or + * the array index parameters are not valid.</p> + * <p> + * <ul> + * <li>The result is <code>NaN</code> iff all values are <code>NaN</code> + * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li> + * <li>If any of the values equals <code>Double.POSITIVE_INFINITY</code>, + * the result is <code>Double.POSITIVE_INFINITY.</code></li> + * </ul></p> + * + * @param values the input array + * @param begin index of the first array element to include + * @param length the number of elements to include + * @return the maximum of the values or Double.NaN if length = 0 + * @throws MathIllegalArgumentException if the array is null or the array index + * parameters are not valid + */ + @Override + public double evaluate(final double[] values, final int begin, final int length) + throws MathIllegalArgumentException { + double max = Double.NaN; + if (test(values, begin, length)) { + max = values[begin]; + for (int i = begin; i < begin + length; i++) { + if (!Double.isNaN(values[i])) { + max = (max > values[i]) ? max : values[i]; + } + } + } + return max; + } + + /** + * {@inheritDoc} + */ + @Override + public Max copy() { + Max result = new Max(); + // No try-catch or advertised exception because args are non-null + copy(this, result); + return result; + } + + /** + * Copies source to dest. + * <p>Neither source nor dest can be null.</p> + * + * @param source Max to copy + * @param dest Max to copy to + * @throws NullArgumentException if either source or dest is null + */ + public static void copy(Max source, Max dest) + throws NullArgumentException { + MathUtils.checkNotNull(source); + MathUtils.checkNotNull(dest); + dest.setData(source.getDataRef()); + dest.n = source.n; + dest.value = source.value; + } +} diff --git a/src/main/java/org/apache/commons/math3/stat/descriptive/rank/Median.java b/src/main/java/org/apache/commons/math3/stat/descriptive/rank/Median.java new file mode 100644 index 0000000..6350a0b --- /dev/null +++ b/src/main/java/org/apache/commons/math3/stat/descriptive/rank/Median.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.stat.descriptive.rank; + +import java.io.Serializable; + +import org.apache.commons.math3.exception.MathIllegalArgumentException; +import org.apache.commons.math3.exception.NullArgumentException; +import org.apache.commons.math3.stat.ranking.NaNStrategy; +import org.apache.commons.math3.util.KthSelector; + + +/** + * Returns the median of the available values. This is the same as the 50th percentile. + * See {@link Percentile} for a description of the algorithm used. + * <p> + * <strong>Note that this implementation is not synchronized.</strong> If + * multiple threads access an instance of this class concurrently, and at least + * one of the threads invokes the <code>increment()</code> or + * <code>clear()</code> method, it must be synchronized externally.</p> + * + */ +public class Median extends Percentile implements Serializable { + + /** Serializable version identifier */ + private static final long serialVersionUID = -3961477041290915687L; + + /** Fixed quantile. */ + private static final double FIXED_QUANTILE_50 = 50.0; + + /** + * Default constructor. + */ + public Median() { + // No try-catch or advertised exception - arg is valid + super(FIXED_QUANTILE_50); + } + + /** + * Copy constructor, creates a new {@code Median} identical + * to the {@code original} + * + * @param original the {@code Median} instance to copy + * @throws NullArgumentException if original is null + */ + public Median(Median original) throws NullArgumentException { + super(original); + } + + /** + * Constructs a Median with the specific {@link EstimationType}, {@link NaNStrategy} and {@link PivotingStrategy}. + * + * @param estimationType one of the percentile {@link EstimationType estimation types} + * @param nanStrategy one of {@link NaNStrategy} to handle with NaNs + * @param kthSelector {@link KthSelector} to use for pivoting during search + * @throws MathIllegalArgumentException if p is not within (0,100] + * @throws NullArgumentException if type or NaNStrategy passed is null + */ + private Median(final EstimationType estimationType, final NaNStrategy nanStrategy, + final KthSelector kthSelector) + throws MathIllegalArgumentException { + super(FIXED_QUANTILE_50, estimationType, nanStrategy, kthSelector); + } + + /** {@inheritDoc} */ + @Override + public Median withEstimationType(final EstimationType newEstimationType) { + return new Median(newEstimationType, getNaNStrategy(), getKthSelector()); + } + + /** {@inheritDoc} */ + @Override + public Median withNaNStrategy(final NaNStrategy newNaNStrategy) { + return new Median(getEstimationType(), newNaNStrategy, getKthSelector()); + } + + /** {@inheritDoc} */ + @Override + public Median withKthSelector(final KthSelector newKthSelector) { + return new Median(getEstimationType(), getNaNStrategy(), newKthSelector); + } + +} diff --git a/src/main/java/org/apache/commons/math3/stat/descriptive/rank/Min.java b/src/main/java/org/apache/commons/math3/stat/descriptive/rank/Min.java new file mode 100644 index 0000000..c87e6f1 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/stat/descriptive/rank/Min.java @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.stat.descriptive.rank; + +import java.io.Serializable; + +import org.apache.commons.math3.exception.MathIllegalArgumentException; +import org.apache.commons.math3.exception.NullArgumentException; +import org.apache.commons.math3.stat.descriptive.AbstractStorelessUnivariateStatistic; +import org.apache.commons.math3.util.MathUtils; + +/** + * Returns the minimum of the available values. + * <p> + * <ul> + * <li>The result is <code>NaN</code> iff all values are <code>NaN</code> + * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li> + * <li>If any of the values equals <code>Double.NEGATIVE_INFINITY</code>, + * the result is <code>Double.NEGATIVE_INFINITY.</code></li> + * </ul></p> + * <p> + * <strong>Note that this implementation is not synchronized.</strong> If + * multiple threads access an instance of this class concurrently, and at least + * one of the threads invokes the <code>increment()</code> or + * <code>clear()</code> method, it must be synchronized externally.</p> + * + */ +public class Min extends AbstractStorelessUnivariateStatistic implements Serializable { + + /** Serializable version identifier */ + private static final long serialVersionUID = -2941995784909003131L; + + /**Number of values that have been added */ + private long n; + + /**Current value of the statistic */ + private double value; + + /** + * Create a Min instance + */ + public Min() { + n = 0; + value = Double.NaN; + } + + /** + * Copy constructor, creates a new {@code Min} identical + * to the {@code original} + * + * @param original the {@code Min} instance to copy + * @throws NullArgumentException if original is null + */ + public Min(Min original) throws NullArgumentException { + copy(original, this); + } + + /** + * {@inheritDoc} + */ + @Override + public void increment(final double d) { + if (d < value || Double.isNaN(value)) { + value = d; + } + n++; + } + + /** + * {@inheritDoc} + */ + @Override + public void clear() { + value = Double.NaN; + n = 0; + } + + /** + * {@inheritDoc} + */ + @Override + public double getResult() { + return value; + } + + /** + * {@inheritDoc} + */ + public long getN() { + return n; + } + + /** + * Returns the minimum of the entries in the specified portion of + * the input array, or <code>Double.NaN</code> if the designated subarray + * is empty. + * <p> + * Throws <code>MathIllegalArgumentException</code> if the array is null or + * the array index parameters are not valid.</p> + * <p> + * <ul> + * <li>The result is <code>NaN</code> iff all values are <code>NaN</code> + * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li> + * <li>If any of the values equals <code>Double.NEGATIVE_INFINITY</code>, + * the result is <code>Double.NEGATIVE_INFINITY.</code></li> + * </ul> </p> + * + * @param values the input array + * @param begin index of the first array element to include + * @param length the number of elements to include + * @return the minimum of the values or Double.NaN if length = 0 + * @throws MathIllegalArgumentException if the array is null or the array index + * parameters are not valid + */ + @Override + public double evaluate(final double[] values,final int begin, final int length) + throws MathIllegalArgumentException { + double min = Double.NaN; + if (test(values, begin, length)) { + min = values[begin]; + for (int i = begin; i < begin + length; i++) { + if (!Double.isNaN(values[i])) { + min = (min < values[i]) ? min : values[i]; + } + } + } + return min; + } + + /** + * {@inheritDoc} + */ + @Override + public Min copy() { + Min result = new Min(); + // No try-catch or advertised exception - args are non-null + copy(this, result); + return result; + } + + /** + * Copies source to dest. + * <p>Neither source nor dest can be null.</p> + * + * @param source Min to copy + * @param dest Min to copy to + * @throws NullArgumentException if either source or dest is null + */ + public static void copy(Min source, Min dest) + throws NullArgumentException { + MathUtils.checkNotNull(source); + MathUtils.checkNotNull(dest); + dest.setData(source.getDataRef()); + dest.n = source.n; + dest.value = source.value; + } +} diff --git a/src/main/java/org/apache/commons/math3/stat/descriptive/rank/PSquarePercentile.java b/src/main/java/org/apache/commons/math3/stat/descriptive/rank/PSquarePercentile.java new file mode 100644 index 0000000..b8bc274 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/stat/descriptive/rank/PSquarePercentile.java @@ -0,0 +1,997 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.stat.descriptive.rank; + +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.Serializable; +import java.text.DecimalFormat; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.List; + +import org.apache.commons.math3.analysis.UnivariateFunction; +import org.apache.commons.math3.analysis.interpolation.LinearInterpolator; +import org.apache.commons.math3.analysis.interpolation.NevilleInterpolator; +import org.apache.commons.math3.analysis.interpolation.UnivariateInterpolator; +import org.apache.commons.math3.exception.InsufficientDataException; +import org.apache.commons.math3.exception.OutOfRangeException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.stat.descriptive.AbstractStorelessUnivariateStatistic; +import org.apache.commons.math3.stat.descriptive.StorelessUnivariateStatistic; +import org.apache.commons.math3.util.MathArrays; +import org.apache.commons.math3.util.MathUtils; +import org.apache.commons.math3.util.Precision; + +/** + * A {@link StorelessUnivariateStatistic} estimating percentiles using the + * <ahref=http://www.cs.wustl.edu/~jain/papers/ftp/psqr.pdf>P<SUP>2</SUP></a> + * Algorithm as explained by <a href=http://www.cse.wustl.edu/~jain/>Raj + * Jain</a> and Imrich Chlamtac in + * <a href=http://www.cse.wustl.edu/~jain/papers/psqr.htm>P<SUP>2</SUP> Algorithm + * for Dynamic Calculation of Quantiles and Histogram Without Storing + * Observations</a>. + * <p> + * Note: This implementation is not synchronized and produces an approximate + * result. For small samples, where data can be stored and processed in memory, + * {@link Percentile} should be used.</p> + * + */ +public class PSquarePercentile extends AbstractStorelessUnivariateStatistic + implements StorelessUnivariateStatistic, Serializable { + + /** + * The maximum array size used for psquare algorithm + */ + private static final int PSQUARE_CONSTANT = 5; + + /** + * A Default quantile needed in case if user prefers to use default no + * argument constructor. + */ + private static final double DEFAULT_QUANTILE_DESIRED = 50d; + + /** + * Serial ID + */ + private static final long serialVersionUID = 2283912083175715479L; + + /** + * A decimal formatter for print convenience + */ + private static final DecimalFormat DECIMAL_FORMAT = new DecimalFormat( + "00.00"); + + /** + * Initial list of 5 numbers corresponding to 5 markers. <b>NOTE:</b>watch + * out for the add methods that are overloaded + */ + private final List<Double> initialFive = new FixedCapacityList<Double>( + PSQUARE_CONSTANT); + + /** + * The quantile needed should be in range of 0-1. The constructor + * {@link #PSquarePercentile(double)} ensures that passed in percentile is + * divided by 100. + */ + private final double quantile; + + /** + * lastObservation is the last observation value/input sample. No need to + * serialize + */ + private transient double lastObservation; + + /** + * Markers is the marker collection object which comes to effect + * only after 5 values are inserted + */ + private PSquareMarkers markers = null; + + /** + * Computed p value (i,e percentile value of data set hither to received) + */ + private double pValue = Double.NaN; + + /** + * Counter to count the values/observations accepted into this data set + */ + private long countOfObservations; + + /** + * Constructs a PSquarePercentile with the specific percentile value. + * @param p the percentile + * @throws OutOfRangeException if p is not greater than 0 and less + * than or equal to 100 + */ + public PSquarePercentile(final double p) { + if (p > 100 || p < 0) { + throw new OutOfRangeException(LocalizedFormats.OUT_OF_RANGE, + p, 0, 100); + } + this.quantile = p / 100d;// always set it within (0,1] + } + + /** + * Default constructor that assumes a {@link #DEFAULT_QUANTILE_DESIRED + * default quantile} needed + */ + PSquarePercentile() { + this(DEFAULT_QUANTILE_DESIRED); + } + + /** + * {@inheritDoc} + */ + @Override + public int hashCode() { + double result = getResult(); + result = Double.isNaN(result) ? 37 : result; + final double markersHash = markers == null ? 0 : markers.hashCode(); + final double[] toHash = {result, quantile, markersHash, countOfObservations}; + return Arrays.hashCode(toHash); + } + + /** + * Returns true iff {@code o} is a {@code PSquarePercentile} returning the + * same values as this for {@code getResult()} and {@code getN()} and also + * having equal markers + * + * @param o object to compare + * @return true if {@code o} is a {@code PSquarePercentile} with + * equivalent internal state + */ + @Override + public boolean equals(Object o) { + boolean result = false; + if (this == o) { + result = true; + } else if (o != null && o instanceof PSquarePercentile) { + PSquarePercentile that = (PSquarePercentile) o; + boolean isNotNull = markers != null && that.markers != null; + boolean isNull = markers == null && that.markers == null; + result = isNotNull ? markers.equals(that.markers) : isNull; + // markers as in the case of first + // five observations + result = result && getN() == that.getN(); + } + return result; + } + + /** + * {@inheritDoc}The internal state updated due to the new value in this + * context is basically of the marker positions and computation of the + * approximate quantile. + * + * @param observation the observation currently being added. + */ + @Override + public void increment(final double observation) { + // Increment counter + countOfObservations++; + + // Store last observation + this.lastObservation = observation; + + // 0. Use Brute force for <5 + if (markers == null) { + if (initialFive.add(observation)) { + Collections.sort(initialFive); + pValue = + initialFive + .get((int) (quantile * (initialFive.size() - 1))); + return; + } + // 1. Initialize once after 5th observation + markers = newMarkers(initialFive, quantile); + } + // 2. process a Data Point and return pValue + pValue = markers.processDataPoint(observation); + } + + /** + * Returns a string containing the last observation, the current estimate + * of the quantile and all markers. + * + * @return string representation of state data + */ + @Override + public String toString() { + + if (markers == null) { + return String.format("obs=%s pValue=%s", + DECIMAL_FORMAT.format(lastObservation), + DECIMAL_FORMAT.format(pValue)); + } else { + return String.format("obs=%s markers=%s", + DECIMAL_FORMAT.format(lastObservation), markers.toString()); + } + } + + /** + * {@inheritDoc} + */ + public long getN() { + return countOfObservations; + } + + /** + * {@inheritDoc} + */ + @Override + public StorelessUnivariateStatistic copy() { + // multiply quantile by 100 now as anyway constructor divides it by 100 + PSquarePercentile copy = new PSquarePercentile(100d * quantile); + + if (markers != null) { + copy.markers = (PSquareMarkers) markers.clone(); + } + copy.countOfObservations = countOfObservations; + copy.pValue = pValue; + copy.initialFive.clear(); + copy.initialFive.addAll(initialFive); + return copy; + } + + /** + * Returns the quantile estimated by this statistic in the range [0.0-1.0] + * + * @return quantile estimated by {@link #getResult()} + */ + public double quantile() { + return quantile; + } + + /** + * {@inheritDoc}. This basically clears all the markers, the + * initialFive list and sets countOfObservations to 0. + */ + @Override + public void clear() { + markers = null; + initialFive.clear(); + countOfObservations = 0L; + pValue = Double.NaN; + } + + /** + * {@inheritDoc} + */ + @Override + public double getResult() { + if (Double.compare(quantile, 1d) == 0) { + pValue = maximum(); + } else if (Double.compare(quantile, 0d) == 0) { + pValue = minimum(); + } + return pValue; + } + + /** + * @return maximum in the data set added to this statistic + */ + private double maximum() { + double val = Double.NaN; + if (markers != null) { + val = markers.height(PSQUARE_CONSTANT); + } else if (!initialFive.isEmpty()) { + val = initialFive.get(initialFive.size() - 1); + } + return val; + } + + /** + * @return minimum in the data set added to this statistic + */ + private double minimum() { + double val = Double.NaN; + if (markers != null) { + val = markers.height(1); + } else if (!initialFive.isEmpty()) { + val = initialFive.get(0); + } + return val; + } + + /** + * Markers is an encapsulation of the five markers/buckets as indicated in + * the original works. + */ + private static class Markers implements PSquareMarkers, Serializable { + /** + * Serial version id + */ + private static final long serialVersionUID = 1L; + + /** Low marker index */ + private static final int LOW = 2; + + /** High marker index */ + private static final int HIGH = 4; + + /** + * Array of 5+1 Markers (The first marker is dummy just so we + * can match the rest of indexes [1-5] indicated in the original works + * which follows unit based index) + */ + private final Marker[] markerArray; + + /** + * Kth cell belonging to [1-5] of the markerArray. No need for + * this to be serialized + */ + private transient int k = -1; + + /** + * Constructor + * + * @param theMarkerArray marker array to be used + */ + private Markers(final Marker[] theMarkerArray) { + MathUtils.checkNotNull(theMarkerArray); + markerArray = theMarkerArray; + for (int i = 1; i < PSQUARE_CONSTANT; i++) { + markerArray[i].previous(markerArray[i - 1]) + .next(markerArray[i + 1]).index(i); + } + markerArray[0].previous(markerArray[0]).next(markerArray[1]) + .index(0); + markerArray[5].previous(markerArray[4]).next(markerArray[5]) + .index(5); + } + + /** + * Constructor + * + * @param initialFive elements required to build Marker + * @param p quantile required to be computed + */ + private Markers(final List<Double> initialFive, final double p) { + this(createMarkerArray(initialFive, p)); + } + + /** + * Creates a marker array using initial five elements and a quantile + * + * @param initialFive list of initial five elements + * @param p the pth quantile + * @return Marker array + */ + private static Marker[] createMarkerArray( + final List<Double> initialFive, final double p) { + final int countObserved = + initialFive == null ? -1 : initialFive.size(); + if (countObserved < PSQUARE_CONSTANT) { + throw new InsufficientDataException( + LocalizedFormats.INSUFFICIENT_OBSERVED_POINTS_IN_SAMPLE, + countObserved, PSQUARE_CONSTANT); + } + Collections.sort(initialFive); + return new Marker[] { + new Marker(),// Null Marker + new Marker(initialFive.get(0), 1, 0, 1), + new Marker(initialFive.get(1), 1 + 2 * p, p / 2, 2), + new Marker(initialFive.get(2), 1 + 4 * p, p, 3), + new Marker(initialFive.get(3), 3 + 2 * p, (1 + p) / 2, 4), + new Marker(initialFive.get(4), 5, 1, 5) }; + } + + /** + * {@inheritDoc} + */ + @Override + public int hashCode() { + return Arrays.deepHashCode(markerArray); + } + + /** + * {@inheritDoc}.This equals method basically checks for marker array to + * be deep equals. + * + * @param o is the other object + * @return true if the object compares with this object are equivalent + */ + @Override + public boolean equals(Object o) { + boolean result = false; + if (this == o) { + result = true; + } else if (o != null && o instanceof Markers) { + Markers that = (Markers) o; + result = Arrays.deepEquals(markerArray, that.markerArray); + } + return result; + } + + /** + * Process a data point + * + * @param inputDataPoint is the data point passed + * @return computed percentile + */ + public double processDataPoint(final double inputDataPoint) { + + // 1. Find cell and update minima and maxima + final int kthCell = findCellAndUpdateMinMax(inputDataPoint); + + // 2. Increment positions + incrementPositions(1, kthCell + 1, 5); + + // 2a. Update desired position with increments + updateDesiredPositions(); + + // 3. Adjust heights of m[2-4] if necessary + adjustHeightsOfMarkers(); + + // 4. Return percentile + return getPercentileValue(); + } + + /** + * Returns the percentile computed thus far. + * + * @return height of mid point marker + */ + public double getPercentileValue() { + return height(3); + } + + /** + * Finds the cell where the input observation / value fits. + * + * @param observation the input value to be checked for + * @return kth cell (of the markers ranging from 1-5) where observed + * sample fits + */ + private int findCellAndUpdateMinMax(final double observation) { + k = -1; + if (observation < height(1)) { + markerArray[1].markerHeight = observation; + k = 1; + } else if (observation < height(2)) { + k = 1; + } else if (observation < height(3)) { + k = 2; + } else if (observation < height(4)) { + k = 3; + } else if (observation <= height(5)) { + k = 4; + } else { + markerArray[5].markerHeight = observation; + k = 4; + } + return k; + } + + /** + * Adjust marker heights by setting quantile estimates to middle markers. + */ + private void adjustHeightsOfMarkers() { + for (int i = LOW; i <= HIGH; i++) { + estimate(i); + } + } + + /** + * {@inheritDoc} + */ + public double estimate(final int index) { + if (index < LOW || index > HIGH) { + throw new OutOfRangeException(index, LOW, HIGH); + } + return markerArray[index].estimate(); + } + + /** + * Increment positions by d. Refer to algorithm paper for the + * definition of d. + * + * @param d The increment value for the position + * @param startIndex start index of the marker array + * @param endIndex end index of the marker array + */ + private void incrementPositions(final int d, final int startIndex, + final int endIndex) { + for (int i = startIndex; i <= endIndex; i++) { + markerArray[i].incrementPosition(d); + } + } + + /** + * Desired positions incremented by bucket width. The bucket width is + * basically the desired increments. + */ + private void updateDesiredPositions() { + for (int i = 1; i < markerArray.length; i++) { + markerArray[i].updateDesiredPosition(); + } + } + + /** + * Sets previous and next markers after default read is done. + * + * @param anInputStream the input stream to be deserialized + * @throws ClassNotFoundException thrown when a desired class not found + * @throws IOException thrown due to any io errors + */ + private void readObject(ObjectInputStream anInputStream) + throws ClassNotFoundException, IOException { + // always perform the default de-serialization first + anInputStream.defaultReadObject(); + // Build links + for (int i = 1; i < PSQUARE_CONSTANT; i++) { + markerArray[i].previous(markerArray[i - 1]) + .next(markerArray[i + 1]).index(i); + } + markerArray[0].previous(markerArray[0]).next(markerArray[1]) + .index(0); + markerArray[5].previous(markerArray[4]).next(markerArray[5]) + .index(5); + } + + /** + * Return marker height given index + * + * @param markerIndex index of marker within (1,6) + * @return marker height + */ + public double height(final int markerIndex) { + if (markerIndex >= markerArray.length || markerIndex <= 0) { + throw new OutOfRangeException(markerIndex, 1, + markerArray.length); + } + return markerArray[markerIndex].markerHeight; + } + + /** + * {@inheritDoc}.Clone Markers + * + * @return cloned object + */ + @Override + public Object clone() { + return new Markers(new Marker[] { new Marker(), + (Marker) markerArray[1].clone(), + (Marker) markerArray[2].clone(), + (Marker) markerArray[3].clone(), + (Marker) markerArray[4].clone(), + (Marker) markerArray[5].clone() }); + + } + + /** + * Returns string representation of the Marker array. + * + * @return Markers as a string + */ + @Override + public String toString() { + return String.format("m1=[%s],m2=[%s],m3=[%s],m4=[%s],m5=[%s]", + markerArray[1].toString(), markerArray[2].toString(), + markerArray[3].toString(), markerArray[4].toString(), + markerArray[5].toString()); + } + + } + + /** + * The class modeling the attributes of the marker of the P-square algorithm + */ + private static class Marker implements Serializable, Cloneable { + + /** + * Serial Version ID + */ + private static final long serialVersionUID = -3575879478288538431L; + + /** + * The marker index which is just a serial number for the marker in the + * marker array of 5+1. + */ + private int index; + + /** + * The integral marker position. Refer to the variable n in the original + * works. + */ + private double intMarkerPosition; + + /** + * Desired marker position. Refer to the variable n' in the original + * works. + */ + private double desiredMarkerPosition; + + /** + * Marker height or the quantile. Refer to the variable q in the + * original works. + */ + private double markerHeight; + + /** + * Desired marker increment. Refer to the variable dn' in the original + * works. + */ + private double desiredMarkerIncrement; + + /** + * Next and previous markers for easy linked navigation in loops. this + * is not serialized as they can be rebuilt during deserialization. + */ + private transient Marker next; + + /** + * The previous marker links + */ + private transient Marker previous; + + /** + * Nonlinear interpolator + */ + private final UnivariateInterpolator nonLinear = + new NevilleInterpolator(); + + /** + * Linear interpolator which is not serializable + */ + private transient UnivariateInterpolator linear = + new LinearInterpolator(); + + /** + * Default constructor + */ + private Marker() { + this.next = this.previous = this; + } + + /** + * Constructor of the marker with parameters + * + * @param heightOfMarker represent the quantile value + * @param makerPositionDesired represent the desired marker position + * @param markerPositionIncrement represent increments for position + * @param markerPositionNumber represent the position number of marker + */ + private Marker(double heightOfMarker, double makerPositionDesired, + double markerPositionIncrement, double markerPositionNumber) { + this(); + this.markerHeight = heightOfMarker; + this.desiredMarkerPosition = makerPositionDesired; + this.desiredMarkerIncrement = markerPositionIncrement; + this.intMarkerPosition = markerPositionNumber; + } + + /** + * Sets the previous marker. + * + * @param previousMarker the previous marker to the current marker in + * the array of markers + * @return this instance + */ + private Marker previous(final Marker previousMarker) { + MathUtils.checkNotNull(previousMarker); + this.previous = previousMarker; + return this; + } + + /** + * Sets the next marker. + * + * @param nextMarker the next marker to the current marker in the array + * of markers + * @return this instance + */ + private Marker next(final Marker nextMarker) { + MathUtils.checkNotNull(nextMarker); + this.next = nextMarker; + return this; + } + + /** + * Sets the index of the marker. + * + * @param indexOfMarker the array index of the marker in marker array + * @return this instance + */ + private Marker index(final int indexOfMarker) { + this.index = indexOfMarker; + return this; + } + + /** + * Update desired Position with increment. + */ + private void updateDesiredPosition() { + desiredMarkerPosition += desiredMarkerIncrement; + } + + /** + * Increment Position by d. + * + * @param d a delta value to increment + */ + private void incrementPosition(final int d) { + intMarkerPosition += d; + } + + /** + * Difference between desired and actual position + * + * @return difference between desired and actual position + */ + private double difference() { + return desiredMarkerPosition - intMarkerPosition; + } + + /** + * Estimate the quantile for the current marker. + * + * @return estimated quantile + */ + private double estimate() { + final double di = difference(); + final boolean isNextHigher = + next.intMarkerPosition - intMarkerPosition > 1; + final boolean isPreviousLower = + previous.intMarkerPosition - intMarkerPosition < -1; + + if (di >= 1 && isNextHigher || di <= -1 && isPreviousLower) { + final int d = di >= 0 ? 1 : -1; + final double[] xval = + new double[] { previous.intMarkerPosition, + intMarkerPosition, next.intMarkerPosition }; + final double[] yval = + new double[] { previous.markerHeight, markerHeight, + next.markerHeight }; + final double xD = intMarkerPosition + d; + + UnivariateFunction univariateFunction = + nonLinear.interpolate(xval, yval); + markerHeight = univariateFunction.value(xD); + + // If parabolic estimate is bad then turn linear + if (isEstimateBad(yval, markerHeight)) { + int delta = xD - xval[1] > 0 ? 1 : -1; + final double[] xBad = + new double[] { xval[1], xval[1 + delta] }; + final double[] yBad = + new double[] { yval[1], yval[1 + delta] }; + MathArrays.sortInPlace(xBad, yBad);// since d can be +/- 1 + univariateFunction = linear.interpolate(xBad, yBad); + markerHeight = univariateFunction.value(xD); + } + incrementPosition(d); + } + return markerHeight; + } + + /** + * Check if parabolic/nonlinear estimate is bad by checking if the + * ordinate found is beyond the y[0] and y[2]. + * + * @param y the array to get the bounds + * @param yD the estimate + * @return true if yD is a bad estimate + */ + private boolean isEstimateBad(final double[] y, final double yD) { + return yD <= y[0] || yD >= y[2]; + } + + /** + * {@inheritDoc}<i>This equals method checks for marker attributes and + * as well checks if navigation pointers (next and previous) are the same + * between this and passed in object</i> + * + * @param o Other object + * @return true if this equals passed in other object o + */ + @Override + public boolean equals(Object o) { + boolean result = false; + if (this == o) { + result = true; + } else if (o != null && o instanceof Marker) { + Marker that = (Marker) o; + + result = Double.compare(markerHeight, that.markerHeight) == 0; + result = + result && + Double.compare(intMarkerPosition, + that.intMarkerPosition) == 0; + result = + result && + Double.compare(desiredMarkerPosition, + that.desiredMarkerPosition) == 0; + result = + result && + Double.compare(desiredMarkerIncrement, + that.desiredMarkerIncrement) == 0; + + result = result && next.index == that.next.index; + result = result && previous.index == that.previous.index; + } + return result; + } + + /** {@inheritDoc} */ + @Override + public int hashCode() { + return Arrays.hashCode(new double[] {markerHeight, intMarkerPosition, + desiredMarkerIncrement, desiredMarkerPosition, previous.index, next.index}); + } + + /** + * Read Object to deserialize. + * + * @param anInstream Stream Object data + * @throws IOException thrown for IO Errors + * @throws ClassNotFoundException thrown for class not being found + */ + private void readObject(ObjectInputStream anInstream) + throws ClassNotFoundException, IOException { + anInstream.defaultReadObject(); + previous=next=this; + linear = new LinearInterpolator(); + } + + /** + * Clone this instance. + * + * @return cloned marker + */ + @Override + public Object clone() { + return new Marker(markerHeight, desiredMarkerPosition, + desiredMarkerIncrement, intMarkerPosition); + } + + /** + * {@inheritDoc} + */ + @Override + public String toString() { + return String.format( + "index=%.0f,n=%.0f,np=%.2f,q=%.2f,dn=%.2f,prev=%d,next=%d", + (double) index, Precision.round(intMarkerPosition, 0), + Precision.round(desiredMarkerPosition, 2), + Precision.round(markerHeight, 2), + Precision.round(desiredMarkerIncrement, 2), previous.index, + next.index); + } + } + + /** + * A simple fixed capacity list that has an upper bound to growth. + * Once its capacity is reached, {@code add} is a no-op, returning + * {@code false}. + * + * @param <E> + */ + private static class FixedCapacityList<E> extends ArrayList<E> implements + Serializable { + /** + * Serialization Version Id + */ + private static final long serialVersionUID = 2283952083075725479L; + /** + * Capacity of the list + */ + private final int capacity; + + /** + * This constructor constructs the list with given capacity and as well + * as stores the capacity + * + * @param fixedCapacity the capacity to be fixed for this list + */ + FixedCapacityList(final int fixedCapacity) { + super(fixedCapacity); + this.capacity = fixedCapacity; + } + + /** + * {@inheritDoc} In addition it checks if the {@link #size()} returns a + * size that is within capacity and if true it adds; otherwise the list + * contents are unchanged and {@code false} is returned. + * + * @return true if addition is successful and false otherwise + */ + @Override + public boolean add(final E e) { + return size() < capacity ? super.add(e) : false; + } + + /** + * {@inheritDoc} In addition it checks if the sum of Collection size and + * this instance's {@link #size()} returns a value that is within + * capacity and if true it adds the collection; otherwise the list + * contents are unchanged and {@code false} is returned. + * + * @return true if addition is successful and false otherwise + */ + @Override + public boolean addAll(Collection<? extends E> collection) { + boolean isCollectionLess = + collection != null && + collection.size() + size() <= capacity; + return isCollectionLess ? super.addAll(collection) : false; + } + } + + /** + * A creation method to build Markers + * + * @param initialFive list of initial five elements + * @param p the quantile desired + * @return an instance of PSquareMarkers + */ + public static PSquareMarkers newMarkers(final List<Double> initialFive, + final double p) { + return new Markers(initialFive, p); + } + + /** + * An interface that encapsulates abstractions of the + * P-square algorithm markers as is explained in the original works. This + * interface is exposed with protected access to help in testability. + */ + protected interface PSquareMarkers extends Cloneable { + /** + * Returns Percentile value computed thus far. + * + * @return percentile + */ + double getPercentileValue(); + + /** + * A clone function to clone the current instance. It's created as an + * interface method as well for convenience though Cloneable is just a + * marker interface. + * + * @return clone of this instance + */ + Object clone(); + + /** + * Returns the marker height (or percentile) of a given marker index. + * + * @param markerIndex is the index of marker in the marker array + * @return percentile value of the marker index passed + * @throws OutOfRangeException in case the index is not within [1-5] + */ + double height(final int markerIndex); + + /** + * Process a data point by moving the marker heights based on estimator. + * + * @param inputDataPoint is the data point passed + * @return computed percentile + */ + double processDataPoint(final double inputDataPoint); + + /** + * An Estimate of the percentile value of a given Marker + * + * @param index the marker's index in the array of markers + * @return percentile estimate + * @throws OutOfRangeException in case if index is not within [1-5] + */ + double estimate(final int index); + } +} diff --git a/src/main/java/org/apache/commons/math3/stat/descriptive/rank/Percentile.java b/src/main/java/org/apache/commons/math3/stat/descriptive/rank/Percentile.java new file mode 100644 index 0000000..bba9e7c --- /dev/null +++ b/src/main/java/org/apache/commons/math3/stat/descriptive/rank/Percentile.java @@ -0,0 +1,1072 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.stat.descriptive.rank; + +import java.io.Serializable; +import java.util.Arrays; +import java.util.BitSet; + +import org.apache.commons.math3.exception.MathIllegalArgumentException; +import org.apache.commons.math3.exception.MathUnsupportedOperationException; +import org.apache.commons.math3.exception.NullArgumentException; +import org.apache.commons.math3.exception.OutOfRangeException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.stat.descriptive.AbstractUnivariateStatistic; +import org.apache.commons.math3.stat.ranking.NaNStrategy; +import org.apache.commons.math3.util.FastMath; +import org.apache.commons.math3.util.KthSelector; +import org.apache.commons.math3.util.MathArrays; +import org.apache.commons.math3.util.MathUtils; +import org.apache.commons.math3.util.MedianOf3PivotingStrategy; +import org.apache.commons.math3.util.PivotingStrategyInterface; +import org.apache.commons.math3.util.Precision; + +/** + * Provides percentile computation. + * <p> + * There are several commonly used methods for estimating percentiles (a.k.a. + * quantiles) based on sample data. For large samples, the different methods + * agree closely, but when sample sizes are small, different methods will give + * significantly different results. The algorithm implemented here works as follows: + * <ol> + * <li>Let <code>n</code> be the length of the (sorted) array and + * <code>0 < p <= 100</code> be the desired percentile.</li> + * <li>If <code> n = 1 </code> return the unique array element (regardless of + * the value of <code>p</code>); otherwise </li> + * <li>Compute the estimated percentile position + * <code> pos = p * (n + 1) / 100</code> and the difference, <code>d</code> + * between <code>pos</code> and <code>floor(pos)</code> (i.e. the fractional + * part of <code>pos</code>).</li> + * <li> If <code>pos < 1</code> return the smallest element in the array.</li> + * <li> Else if <code>pos >= n</code> return the largest element in the array.</li> + * <li> Else let <code>lower</code> be the element in position + * <code>floor(pos)</code> in the array and let <code>upper</code> be the + * next element in the array. Return <code>lower + d * (upper - lower)</code> + * </li> + * </ol></p> + * <p> + * To compute percentiles, the data must be at least partially ordered. Input + * arrays are copied and recursively partitioned using an ordering definition. + * The ordering used by <code>Arrays.sort(double[])</code> is the one determined + * by {@link java.lang.Double#compareTo(Double)}. This ordering makes + * <code>Double.NaN</code> larger than any other value (including + * <code>Double.POSITIVE_INFINITY</code>). Therefore, for example, the median + * (50th percentile) of + * <code>{0, 1, 2, 3, 4, Double.NaN}</code> evaluates to <code>2.5.</code></p> + * <p> + * Since percentile estimation usually involves interpolation between array + * elements, arrays containing <code>NaN</code> or infinite values will often + * result in <code>NaN</code> or infinite values returned.</p> + * <p> + * Further, to include different estimation types such as R1, R2 as mentioned in + * <a href="http://en.wikipedia.org/wiki/Quantile">Quantile page(wikipedia)</a>, + * a type specific NaN handling strategy is used to closely match with the + * typically observed results from popular tools like R(R1-R9), Excel(R7).</p> + * <p> + * Since 2.2, Percentile uses only selection instead of complete sorting + * and caches selection algorithm state between calls to the various + * {@code evaluate} methods. This greatly improves efficiency, both for a single + * percentile and multiple percentile computations. To maximize performance when + * multiple percentiles are computed based on the same data, users should set the + * data array once using either one of the {@link #evaluate(double[], double)} or + * {@link #setData(double[])} methods and thereafter {@link #evaluate(double)} + * with just the percentile provided. + * </p> + * <p> + * <strong>Note that this implementation is not synchronized.</strong> If + * multiple threads access an instance of this class concurrently, and at least + * one of the threads invokes the <code>increment()</code> or + * <code>clear()</code> method, it must be synchronized externally.</p> + * + */ +public class Percentile extends AbstractUnivariateStatistic implements Serializable { + + /** Serializable version identifier */ + private static final long serialVersionUID = -8091216485095130416L; + + /** Maximum number of partitioning pivots cached (each level double the number of pivots). */ + private static final int MAX_CACHED_LEVELS = 10; + + /** Maximum number of cached pivots in the pivots cached array */ + private static final int PIVOTS_HEAP_LENGTH = 0x1 << MAX_CACHED_LEVELS - 1; + + /** Default KthSelector used with default pivoting strategy */ + private final KthSelector kthSelector; + + /** Any of the {@link EstimationType}s such as {@link EstimationType#LEGACY CM} can be used. */ + private final EstimationType estimationType; + + /** NaN Handling of the input as defined by {@link NaNStrategy} */ + private final NaNStrategy nanStrategy; + + /** Determines what percentile is computed when evaluate() is activated + * with no quantile argument */ + private double quantile; + + /** Cached pivots. */ + private int[] cachedPivots; + + /** + * Constructs a Percentile with the following defaults. + * <ul> + * <li>default quantile: 50.0, can be reset with {@link #setQuantile(double)}</li> + * <li>default estimation type: {@link EstimationType#LEGACY}, + * can be reset with {@link #withEstimationType(EstimationType)}</li> + * <li>default NaN strategy: {@link NaNStrategy#REMOVED}, + * can be reset with {@link #withNaNStrategy(NaNStrategy)}</li> + * <li>a KthSelector that makes use of {@link MedianOf3PivotingStrategy}, + * can be reset with {@link #withKthSelector(KthSelector)}</li> + * </ul> + */ + public Percentile() { + // No try-catch or advertised exception here - arg is valid + this(50.0); + } + + /** + * Constructs a Percentile with the specific quantile value and the following + * <ul> + * <li>default method type: {@link EstimationType#LEGACY}</li> + * <li>default NaN strategy: {@link NaNStrategy#REMOVED}</li> + * <li>a Kth Selector : {@link KthSelector}</li> + * </ul> + * @param quantile the quantile + * @throws MathIllegalArgumentException if p is not greater than 0 and less + * than or equal to 100 + */ + public Percentile(final double quantile) throws MathIllegalArgumentException { + this(quantile, EstimationType.LEGACY, NaNStrategy.REMOVED, + new KthSelector(new MedianOf3PivotingStrategy())); + } + + /** + * Copy constructor, creates a new {@code Percentile} identical + * to the {@code original} + * + * @param original the {@code Percentile} instance to copy + * @throws NullArgumentException if original is null + */ + public Percentile(final Percentile original) throws NullArgumentException { + + MathUtils.checkNotNull(original); + estimationType = original.getEstimationType(); + nanStrategy = original.getNaNStrategy(); + kthSelector = original.getKthSelector(); + + setData(original.getDataRef()); + if (original.cachedPivots != null) { + System.arraycopy(original.cachedPivots, 0, cachedPivots, 0, original.cachedPivots.length); + } + setQuantile(original.quantile); + + } + + /** + * Constructs a Percentile with the specific quantile value, + * {@link EstimationType}, {@link NaNStrategy} and {@link KthSelector}. + * + * @param quantile the quantile to be computed + * @param estimationType one of the percentile {@link EstimationType estimation types} + * @param nanStrategy one of {@link NaNStrategy} to handle with NaNs + * @param kthSelector a {@link KthSelector} to use for pivoting during search + * @throws MathIllegalArgumentException if p is not within (0,100] + * @throws NullArgumentException if type or NaNStrategy passed is null + */ + protected Percentile(final double quantile, + final EstimationType estimationType, + final NaNStrategy nanStrategy, + final KthSelector kthSelector) + throws MathIllegalArgumentException { + setQuantile(quantile); + cachedPivots = null; + MathUtils.checkNotNull(estimationType); + MathUtils.checkNotNull(nanStrategy); + MathUtils.checkNotNull(kthSelector); + this.estimationType = estimationType; + this.nanStrategy = nanStrategy; + this.kthSelector = kthSelector; + } + + /** {@inheritDoc} */ + @Override + public void setData(final double[] values) { + if (values == null) { + cachedPivots = null; + } else { + cachedPivots = new int[PIVOTS_HEAP_LENGTH]; + Arrays.fill(cachedPivots, -1); + } + super.setData(values); + } + + /** {@inheritDoc} */ + @Override + public void setData(final double[] values, final int begin, final int length) + throws MathIllegalArgumentException { + if (values == null) { + cachedPivots = null; + } else { + cachedPivots = new int[PIVOTS_HEAP_LENGTH]; + Arrays.fill(cachedPivots, -1); + } + super.setData(values, begin, length); + } + + /** + * Returns the result of evaluating the statistic over the stored data. + * <p> + * The stored array is the one which was set by previous calls to + * {@link #setData(double[])} + * </p> + * @param p the percentile value to compute + * @return the value of the statistic applied to the stored data + * @throws MathIllegalArgumentException if p is not a valid quantile value + * (p must be greater than 0 and less than or equal to 100) + */ + public double evaluate(final double p) throws MathIllegalArgumentException { + return evaluate(getDataRef(), p); + } + + /** + * Returns an estimate of the <code>p</code>th percentile of the values + * in the <code>values</code> array. + * <p> + * Calls to this method do not modify the internal <code>quantile</code> + * state of this statistic.</p> + * <p> + * <ul> + * <li>Returns <code>Double.NaN</code> if <code>values</code> has length + * <code>0</code></li> + * <li>Returns (for any value of <code>p</code>) <code>values[0]</code> + * if <code>values</code> has length <code>1</code></li> + * <li>Throws <code>MathIllegalArgumentException</code> if <code>values</code> + * is null or p is not a valid quantile value (p must be greater than 0 + * and less than or equal to 100) </li> + * </ul></p> + * <p> + * See {@link Percentile} for a description of the percentile estimation + * algorithm used.</p> + * + * @param values input array of values + * @param p the percentile value to compute + * @return the percentile value or Double.NaN if the array is empty + * @throws MathIllegalArgumentException if <code>values</code> is null + * or p is invalid + */ + public double evaluate(final double[] values, final double p) + throws MathIllegalArgumentException { + test(values, 0, 0); + return evaluate(values, 0, values.length, p); + } + + /** + * Returns an estimate of the <code>quantile</code>th percentile of the + * designated values in the <code>values</code> array. The quantile + * estimated is determined by the <code>quantile</code> property. + * <p> + * <ul> + * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li> + * <li>Returns (for any value of <code>quantile</code>) + * <code>values[begin]</code> if <code>length = 1 </code></li> + * <li>Throws <code>MathIllegalArgumentException</code> if <code>values</code> + * is null, or <code>start</code> or <code>length</code> is invalid</li> + * </ul></p> + * <p> + * See {@link Percentile} for a description of the percentile estimation + * algorithm used.</p> + * + * @param values the input array + * @param start index of the first array element to include + * @param length the number of elements to include + * @return the percentile value + * @throws MathIllegalArgumentException if the parameters are not valid + * + */ + @Override + public double evaluate(final double[] values, final int start, final int length) + throws MathIllegalArgumentException { + return evaluate(values, start, length, quantile); + } + + /** + * Returns an estimate of the <code>p</code>th percentile of the values + * in the <code>values</code> array, starting with the element in (0-based) + * position <code>begin</code> in the array and including <code>length</code> + * values. + * <p> + * Calls to this method do not modify the internal <code>quantile</code> + * state of this statistic.</p> + * <p> + * <ul> + * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li> + * <li>Returns (for any value of <code>p</code>) <code>values[begin]</code> + * if <code>length = 1 </code></li> + * <li>Throws <code>MathIllegalArgumentException</code> if <code>values</code> + * is null , <code>begin</code> or <code>length</code> is invalid, or + * <code>p</code> is not a valid quantile value (p must be greater than 0 + * and less than or equal to 100)</li> + * </ul></p> + * <p> + * See {@link Percentile} for a description of the percentile estimation + * algorithm used.</p> + * + * @param values array of input values + * @param p the percentile to compute + * @param begin the first (0-based) element to include in the computation + * @param length the number of array elements to include + * @return the percentile value + * @throws MathIllegalArgumentException if the parameters are not valid or the + * input array is null + */ + public double evaluate(final double[] values, final int begin, + final int length, final double p) + throws MathIllegalArgumentException { + + test(values, begin, length); + if (p > 100 || p <= 0) { + throw new OutOfRangeException( + LocalizedFormats.OUT_OF_BOUNDS_QUANTILE_VALUE, p, 0, 100); + } + if (length == 0) { + return Double.NaN; + } + if (length == 1) { + return values[begin]; // always return single value for n = 1 + } + + final double[] work = getWorkArray(values, begin, length); + final int[] pivotsHeap = getPivots(values); + return work.length == 0 ? Double.NaN : + estimationType.evaluate(work, pivotsHeap, p, kthSelector); + } + + /** Select a pivot index as the median of three + * <p> + * <b>Note:</b> With the effect of allowing {@link KthSelector} to be set on + * {@link Percentile} instances(thus indirectly {@link PivotingStrategy}) + * this method wont take effect any more and hence is unsupported. + * @param work data array + * @param begin index of the first element of the slice + * @param end index after the last element of the slice + * @return the index of the median element chosen between the + * first, the middle and the last element of the array slice + * @deprecated Please refrain from using this method (as it wont take effect) + * and instead use {@link Percentile#withKthSelector(newKthSelector)} if + * required. + * + */ + @Deprecated + int medianOf3(final double[] work, final int begin, final int end) { + return new MedianOf3PivotingStrategy().pivotIndex(work, begin, end); + //throw new MathUnsupportedOperationException(); + } + + /** + * Returns the value of the quantile field (determines what percentile is + * computed when evaluate() is called with no quantile argument). + * + * @return quantile set while construction or {@link #setQuantile(double)} + */ + public double getQuantile() { + return quantile; + } + + /** + * Sets the value of the quantile field (determines what percentile is + * computed when evaluate() is called with no quantile argument). + * + * @param p a value between 0 < p <= 100 + * @throws MathIllegalArgumentException if p is not greater than 0 and less + * than or equal to 100 + */ + public void setQuantile(final double p) throws MathIllegalArgumentException { + if (p <= 0 || p > 100) { + throw new OutOfRangeException( + LocalizedFormats.OUT_OF_BOUNDS_QUANTILE_VALUE, p, 0, 100); + } + quantile = p; + } + + /** + * {@inheritDoc} + */ + @Override + public Percentile copy() { + return new Percentile(this); + } + + /** + * Copies source to dest. + * @param source Percentile to copy + * @param dest Percentile to copy to + * @exception MathUnsupportedOperationException always thrown since 3.4 + * @deprecated as of 3.4 this method does not work anymore, as it fails to + * copy internal states between instances configured with different + * {@link EstimationType estimation type}, {@link NaNStrategy NaN handling strategies} + * and {@link KthSelector kthSelector}, it therefore always + * throw {@link MathUnsupportedOperationException} + */ + @Deprecated + public static void copy(final Percentile source, final Percentile dest) + throws MathUnsupportedOperationException { + throw new MathUnsupportedOperationException(); + } + + /** + * Get the work array to operate. Makes use of prior {@code storedData} if + * it exists or else do a check on NaNs and copy a subset of the array + * defined by begin and length parameters. The set {@link #nanStrategy} will + * be used to either retain/remove/replace any NaNs present before returning + * the resultant array. + * + * @param values the array of numbers + * @param begin index to start reading the array + * @param length the length of array to be read from the begin index + * @return work array sliced from values in the range [begin,begin+length) + * @throws MathIllegalArgumentException if values or indices are invalid + */ + protected double[] getWorkArray(final double[] values, final int begin, final int length) { + final double[] work; + if (values == getDataRef()) { + work = getDataRef(); + } else { + switch (nanStrategy) { + case MAXIMAL:// Replace NaNs with +INFs + work = replaceAndSlice(values, begin, length, Double.NaN, Double.POSITIVE_INFINITY); + break; + case MINIMAL:// Replace NaNs with -INFs + work = replaceAndSlice(values, begin, length, Double.NaN, Double.NEGATIVE_INFINITY); + break; + case REMOVED:// Drop NaNs from data + work = removeAndSlice(values, begin, length, Double.NaN); + break; + case FAILED:// just throw exception as NaN is un-acceptable + work = copyOf(values, begin, length); + MathArrays.checkNotNaN(work); + break; + default: //FIXED + work = copyOf(values,begin,length); + break; + } + } + return work; + } + + /** + * Make a copy of the array for the slice defined by array part from + * [begin, begin+length) + * @param values the input array + * @param begin start index of the array to include + * @param length number of elements to include from begin + * @return copy of a slice of the original array + */ + private static double[] copyOf(final double[] values, final int begin, final int length) { + MathArrays.verifyValues(values, begin, length); + return MathArrays.copyOfRange(values, begin, begin + length); + } + + /** + * Replace every occurrence of a given value with a replacement value in a + * copied slice of array defined by array part from [begin, begin+length). + * @param values the input array + * @param begin start index of the array to include + * @param length number of elements to include from begin + * @param original the value to be replaced with + * @param replacement the value to be used for replacement + * @return the copy of sliced array with replaced values + */ + private static double[] replaceAndSlice(final double[] values, + final int begin, final int length, + final double original, + final double replacement) { + final double[] temp = copyOf(values, begin, length); + for(int i = 0; i < length; i++) { + temp[i] = Precision.equalsIncludingNaN(original, temp[i]) ? + replacement : temp[i]; + } + return temp; + } + + /** + * Remove the occurrence of a given value in a copied slice of array + * defined by the array part from [begin, begin+length). + * @param values the input array + * @param begin start index of the array to include + * @param length number of elements to include from begin + * @param removedValue the value to be removed from the sliced array + * @return the copy of the sliced array after removing the removedValue + */ + private static double[] removeAndSlice(final double[] values, + final int begin, final int length, + final double removedValue) { + MathArrays.verifyValues(values, begin, length); + final double[] temp; + //BitSet(length) to indicate where the removedValue is located + final BitSet bits = new BitSet(length); + for (int i = begin; i < begin+length; i++) { + if (Precision.equalsIncludingNaN(removedValue, values[i])) { + bits.set(i - begin); + } + } + //Check if empty then create a new copy + if (bits.isEmpty()) { + temp = copyOf(values, begin, length); // Nothing removed, just copy + } else if(bits.cardinality() == length){ + temp = new double[0]; // All removed, just empty + }else { // Some removable, so new + temp = new double[length - bits.cardinality()]; + int start = begin; //start index from source array (i.e values) + int dest = 0; //dest index in destination array(i.e temp) + int nextOne = -1; //nextOne is the index of bit set of next one + int bitSetPtr = 0; //bitSetPtr is start index pointer of bitset + while ((nextOne = bits.nextSetBit(bitSetPtr)) != -1) { + final int lengthToCopy = nextOne - bitSetPtr; + System.arraycopy(values, start, temp, dest, lengthToCopy); + dest += lengthToCopy; + start = begin + (bitSetPtr = bits.nextClearBit(nextOne)); + } + //Copy any residue past start index till begin+length + if (start < begin + length) { + System.arraycopy(values,start,temp,dest,begin + length - start); + } + } + return temp; + } + + /** + * Get pivots which is either cached or a newly created one + * + * @param values array containing the input numbers + * @return cached pivots or a newly created one + */ + private int[] getPivots(final double[] values) { + final int[] pivotsHeap; + if (values == getDataRef()) { + pivotsHeap = cachedPivots; + } else { + pivotsHeap = new int[PIVOTS_HEAP_LENGTH]; + Arrays.fill(pivotsHeap, -1); + } + return pivotsHeap; + } + + /** + * Get the estimation {@link EstimationType type} used for computation. + * + * @return the {@code estimationType} set + */ + public EstimationType getEstimationType() { + return estimationType; + } + + /** + * Build a new instance similar to the current one except for the + * {@link EstimationType estimation type}. + * <p> + * This method is intended to be used as part of a fluent-type builder + * pattern. Building finely tune instances should be done as follows: + * </p> + * <pre> + * Percentile customized = new Percentile(quantile). + * withEstimationType(estimationType). + * withNaNStrategy(nanStrategy). + * withKthSelector(kthSelector); + * </pre> + * <p> + * If any of the {@code withXxx} method is omitted, the default value for + * the corresponding customization parameter will be used. + * </p> + * @param newEstimationType estimation type for the new instance + * @return a new instance, with changed estimation type + * @throws NullArgumentException when newEstimationType is null + */ + public Percentile withEstimationType(final EstimationType newEstimationType) { + return new Percentile(quantile, newEstimationType, nanStrategy, kthSelector); + } + + /** + * Get the {@link NaNStrategy NaN Handling} strategy used for computation. + * @return {@code NaN Handling} strategy set during construction + */ + public NaNStrategy getNaNStrategy() { + return nanStrategy; + } + + /** + * Build a new instance similar to the current one except for the + * {@link NaNStrategy NaN handling} strategy. + * <p> + * This method is intended to be used as part of a fluent-type builder + * pattern. Building finely tune instances should be done as follows: + * </p> + * <pre> + * Percentile customized = new Percentile(quantile). + * withEstimationType(estimationType). + * withNaNStrategy(nanStrategy). + * withKthSelector(kthSelector); + * </pre> + * <p> + * If any of the {@code withXxx} method is omitted, the default value for + * the corresponding customization parameter will be used. + * </p> + * @param newNaNStrategy NaN strategy for the new instance + * @return a new instance, with changed NaN handling strategy + * @throws NullArgumentException when newNaNStrategy is null + */ + public Percentile withNaNStrategy(final NaNStrategy newNaNStrategy) { + return new Percentile(quantile, estimationType, newNaNStrategy, kthSelector); + } + + /** + * Get the {@link KthSelector kthSelector} used for computation. + * @return the {@code kthSelector} set + */ + public KthSelector getKthSelector() { + return kthSelector; + } + + /** + * Get the {@link PivotingStrategyInterface} used in KthSelector for computation. + * @return the pivoting strategy set + */ + public PivotingStrategyInterface getPivotingStrategy() { + return kthSelector.getPivotingStrategy(); + } + + /** + * Build a new instance similar to the current one except for the + * {@link KthSelector kthSelector} instance specifically set. + * <p> + * This method is intended to be used as part of a fluent-type builder + * pattern. Building finely tune instances should be done as follows: + * </p> + * <pre> + * Percentile customized = new Percentile(quantile). + * withEstimationType(estimationType). + * withNaNStrategy(nanStrategy). + * withKthSelector(newKthSelector); + * </pre> + * <p> + * If any of the {@code withXxx} method is omitted, the default value for + * the corresponding customization parameter will be used. + * </p> + * @param newKthSelector KthSelector for the new instance + * @return a new instance, with changed KthSelector + * @throws NullArgumentException when newKthSelector is null + */ + public Percentile withKthSelector(final KthSelector newKthSelector) { + return new Percentile(quantile, estimationType, nanStrategy, + newKthSelector); + } + + /** + * An enum for various estimation strategies of a percentile referred in + * <a href="http://en.wikipedia.org/wiki/Quantile">wikipedia on quantile</a> + * with the names of enum matching those of types mentioned in + * wikipedia. + * <p> + * Each enum corresponding to the specific type of estimation in wikipedia + * implements the respective formulae that specializes in the below aspects + * <ul> + * <li>An <b>index method</b> to calculate approximate index of the + * estimate</li> + * <li>An <b>estimate method</b> to estimate a value found at the earlier + * computed index</li> + * <li>A <b> minLimit</b> on the quantile for which first element of sorted + * input is returned as an estimate </li> + * <li>A <b> maxLimit</b> on the quantile for which last element of sorted + * input is returned as an estimate </li> + * </ul> + * <p> + * Users can now create {@link Percentile} by explicitly passing this enum; + * such as by invoking {@link Percentile#withEstimationType(EstimationType)} + * <p> + * References: + * <ol> + * <li> + * <a href="http://en.wikipedia.org/wiki/Quantile">Wikipedia on quantile</a> + * </li> + * <li> + * <a href="https://www.amherst.edu/media/view/129116/.../Sample+Quantiles.pdf"> + * Hyndman, R. J. and Fan, Y. (1996) Sample quantiles in statistical + * packages, American Statistician 50, 361–365</a> </li> + * <li> + * <a href="http://stat.ethz.ch/R-manual/R-devel/library/stats/html/quantile.html"> + * R-Manual </a></li> + * </ol> + * + */ + public enum EstimationType { + /** + * This is the default type used in the {@link Percentile}.This method + * has the following formulae for index and estimates<br> + * \( \begin{align} + * &index = (N+1)p\ \\ + * &estimate = x_{\lceil h\,-\,1/2 \rceil} \\ + * &minLimit = 0 \\ + * &maxLimit = 1 \\ + * \end{align}\) + */ + LEGACY("Legacy Apache Commons Math") { + /** + * {@inheritDoc}.This method in particular makes use of existing + * Apache Commons Math style of picking up the index. + */ + @Override + protected double index(final double p, final int length) { + final double minLimit = 0d; + final double maxLimit = 1d; + return Double.compare(p, minLimit) == 0 ? 0 : + Double.compare(p, maxLimit) == 0 ? + length : p * (length + 1); + } + }, + /** + * The method R_1 has the following formulae for index and estimates<br> + * \( \begin{align} + * &index= Np + 1/2\, \\ + * &estimate= x_{\lceil h\,-\,1/2 \rceil} \\ + * &minLimit = 0 \\ + * \end{align}\) + */ + R_1("R-1") { + + @Override + protected double index(final double p, final int length) { + final double minLimit = 0d; + return Double.compare(p, minLimit) == 0 ? 0 : length * p + 0.5; + } + + /** + * {@inheritDoc}This method in particular for R_1 uses ceil(pos-0.5) + */ + @Override + protected double estimate(final double[] values, + final int[] pivotsHeap, final double pos, + final int length, final KthSelector selector) { + return super.estimate(values, pivotsHeap, FastMath.ceil(pos - 0.5), length, selector); + } + + }, + /** + * The method R_2 has the following formulae for index and estimates<br> + * \( \begin{align} + * &index= Np + 1/2\, \\ + * &estimate=\frac{x_{\lceil h\,-\,1/2 \rceil} + + * x_{\lfloor h\,+\,1/2 \rfloor}}{2} \\ + * &minLimit = 0 \\ + * &maxLimit = 1 \\ + * \end{align}\) + */ + R_2("R-2") { + + @Override + protected double index(final double p, final int length) { + final double minLimit = 0d; + final double maxLimit = 1d; + return Double.compare(p, maxLimit) == 0 ? length : + Double.compare(p, minLimit) == 0 ? 0 : length * p + 0.5; + } + + /** + * {@inheritDoc}This method in particular for R_2 averages the + * values at ceil(p+0.5) and floor(p-0.5). + */ + @Override + protected double estimate(final double[] values, + final int[] pivotsHeap, final double pos, + final int length, final KthSelector selector) { + final double low = + super.estimate(values, pivotsHeap, FastMath.ceil(pos - 0.5), length, selector); + final double high = + super.estimate(values, pivotsHeap,FastMath.floor(pos + 0.5), length, selector); + return (low + high) / 2; + } + + }, + /** + * The method R_3 has the following formulae for index and estimates<br> + * \( \begin{align} + * &index= Np \\ + * &estimate= x_{\lfloor h \rceil}\, \\ + * &minLimit = 0.5/N \\ + * \end{align}\) + */ + R_3("R-3") { + @Override + protected double index(final double p, final int length) { + final double minLimit = 1d/2 / length; + return Double.compare(p, minLimit) <= 0 ? + 0 : FastMath.rint(length * p); + } + + }, + /** + * The method R_4 has the following formulae for index and estimates<br> + * \( \begin{align} + * &index= Np\, \\ + * &estimate= x_{\lfloor h \rfloor} + (h - + * \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h + * \rfloor}) \\ + * &minLimit = 1/N \\ + * &maxLimit = 1 \\ + * \end{align}\) + */ + R_4("R-4") { + @Override + protected double index(final double p, final int length) { + final double minLimit = 1d / length; + final double maxLimit = 1d; + return Double.compare(p, minLimit) < 0 ? 0 : + Double.compare(p, maxLimit) == 0 ? length : length * p; + } + + }, + /** + * The method R_5 has the following formulae for index and estimates<br> + * \( \begin{align} + * &index= Np + 1/2\\ + * &estimate= x_{\lfloor h \rfloor} + (h - + * \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h + * \rfloor}) \\ + * &minLimit = 0.5/N \\ + * &maxLimit = (N-0.5)/N + * \end{align}\) + */ + R_5("R-5"){ + + @Override + protected double index(final double p, final int length) { + final double minLimit = 1d/2 / length; + final double maxLimit = (length - 0.5) / length; + return Double.compare(p, minLimit) < 0 ? 0 : + Double.compare(p, maxLimit) >= 0 ? + length : length * p + 0.5; + } + }, + /** + * The method R_6 has the following formulae for index and estimates<br> + * \( \begin{align} + * &index= (N + 1)p \\ + * &estimate= x_{\lfloor h \rfloor} + (h - + * \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h + * \rfloor}) \\ + * &minLimit = 1/(N+1) \\ + * &maxLimit = N/(N+1) \\ + * \end{align}\) + * <p> + * <b>Note:</b> This method computes the index in a manner very close to + * the default Commons Math Percentile existing implementation. However + * the difference to be noted is in picking up the limits with which + * first element (p<1(N+1)) and last elements (p>N/(N+1))are done. + * While in default case; these are done with p=0 and p=1 respectively. + */ + R_6("R-6"){ + + @Override + protected double index(final double p, final int length) { + final double minLimit = 1d / (length + 1); + final double maxLimit = 1d * length / (length + 1); + return Double.compare(p, minLimit) < 0 ? 0 : + Double.compare(p, maxLimit) >= 0 ? + length : (length + 1) * p; + } + }, + + /** + * The method R_7 implements Microsoft Excel style computation has the + * following formulae for index and estimates.<br> + * \( \begin{align} + * &index = (N-1)p + 1 \\ + * &estimate = x_{\lfloor h \rfloor} + (h - + * \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h + * \rfloor}) \\ + * &minLimit = 0 \\ + * &maxLimit = 1 \\ + * \end{align}\) + */ + R_7("R-7") { + @Override + protected double index(final double p, final int length) { + final double minLimit = 0d; + final double maxLimit = 1d; + return Double.compare(p, minLimit) == 0 ? 0 : + Double.compare(p, maxLimit) == 0 ? + length : 1 + (length - 1) * p; + } + + }, + + /** + * The method R_8 has the following formulae for index and estimates<br> + * \( \begin{align} + * &index = (N + 1/3)p + 1/3 \\ + * &estimate = x_{\lfloor h \rfloor} + (h - + \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h + * \rfloor}) \\ + * &minLimit = (2/3)/(N+1/3) \\ + * &maxLimit = (N-1/3)/(N+1/3) \\ + * \end{align}\) + * <p> + * As per Ref [2,3] this approach is most recommended as it provides + * an approximate median-unbiased estimate regardless of distribution. + */ + R_8("R-8") { + @Override + protected double index(final double p, final int length) { + final double minLimit = 2 * (1d / 3) / (length + 1d / 3); + final double maxLimit = + (length - 1d / 3) / (length + 1d / 3); + return Double.compare(p, minLimit) < 0 ? 0 : + Double.compare(p, maxLimit) >= 0 ? length : + (length + 1d / 3) * p + 1d / 3; + } + }, + + /** + * The method R_9 has the following formulae for index and estimates<br> + * \( \begin{align} + * &index = (N + 1/4)p + 3/8\\ + * &estimate = x_{\lfloor h \rfloor} + (h - + \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h + * \rfloor}) \\ + * &minLimit = (5/8)/(N+1/4) \\ + * &maxLimit = (N-3/8)/(N+1/4) \\ + * \end{align}\) + */ + R_9("R-9") { + @Override + protected double index(final double p, final int length) { + final double minLimit = 5d/8 / (length + 0.25); + final double maxLimit = (length - 3d/8) / (length + 0.25); + return Double.compare(p, minLimit) < 0 ? 0 : + Double.compare(p, maxLimit) >= 0 ? length : + (length + 0.25) * p + 3d/8; + } + + }, + ; + + /** Simple name such as R-1, R-2 corresponding to those in wikipedia. */ + private final String name; + + /** + * Constructor + * + * @param type name of estimation type as per wikipedia + */ + EstimationType(final String type) { + this.name = type; + } + + /** + * Finds the index of array that can be used as starting index to + * {@link #estimate(double[], int[], double, int, KthSelector) estimate} + * percentile. The calculation of index calculation is specific to each + * {@link EstimationType}. + * + * @param p the p<sup>th</sup> quantile + * @param length the total number of array elements in the work array + * @return a computed real valued index as explained in the wikipedia + */ + protected abstract double index(final double p, final int length); + + /** + * Estimation based on K<sup>th</sup> selection. This may be overridden + * in specific enums to compute slightly different estimations. + * + * @param work array of numbers to be used for finding the percentile + * @param pos indicated positional index prior computed from calling + * {@link #index(double, int)} + * @param pivotsHeap an earlier populated cache if exists; will be used + * @param length size of array considered + * @param selector a {@link KthSelector} used for pivoting during search + * @return estimated percentile + */ + protected double estimate(final double[] work, final int[] pivotsHeap, + final double pos, final int length, + final KthSelector selector) { + + final double fpos = FastMath.floor(pos); + final int intPos = (int) fpos; + final double dif = pos - fpos; + + if (pos < 1) { + return selector.select(work, pivotsHeap, 0); + } + if (pos >= length) { + return selector.select(work, pivotsHeap, length - 1); + } + + final double lower = selector.select(work, pivotsHeap, intPos - 1); + final double upper = selector.select(work, pivotsHeap, intPos); + return lower + dif * (upper - lower); + } + + /** + * Evaluate method to compute the percentile for a given bounded array + * using earlier computed pivots heap.<br> + * This basically calls the {@link #index(double, int) index} and then + * {@link #estimate(double[], int[], double, int, KthSelector) estimate} + * functions to return the estimated percentile value. + * + * @param work array of numbers to be used for finding the percentile + * @param pivotsHeap a prior cached heap which can speed up estimation + * @param p the p<sup>th</sup> quantile to be computed + * @param selector a {@link KthSelector} used for pivoting during search + * @return estimated percentile + * @throws OutOfRangeException if p is out of range + * @throws NullArgumentException if work array is null + */ + protected double evaluate(final double[] work, final int[] pivotsHeap, final double p, + final KthSelector selector) { + MathUtils.checkNotNull(work); + if (p > 100 || p <= 0) { + throw new OutOfRangeException(LocalizedFormats.OUT_OF_BOUNDS_QUANTILE_VALUE, + p, 0, 100); + } + return estimate(work, pivotsHeap, index(p/100d, work.length), work.length, selector); + } + + /** + * Evaluate method to compute the percentile for a given bounded array. + * This basically calls the {@link #index(double, int) index} and then + * {@link #estimate(double[], int[], double, int, KthSelector) estimate} + * functions to return the estimated percentile value. Please + * note that this method does not make use of cached pivots. + * + * @param work array of numbers to be used for finding the percentile + * @param p the p<sup>th</sup> quantile to be computed + * @return estimated percentile + * @param selector a {@link KthSelector} used for pivoting during search + * @throws OutOfRangeException if length or p is out of range + * @throws NullArgumentException if work array is null + */ + public double evaluate(final double[] work, final double p, final KthSelector selector) { + return this.evaluate(work, null, p, selector); + } + + /** + * Gets the name of the enum + * + * @return the name + */ + String getName() { + return name; + } + } +} diff --git a/src/main/java/org/apache/commons/math3/stat/descriptive/rank/package-info.java b/src/main/java/org/apache/commons/math3/stat/descriptive/rank/package-info.java new file mode 100644 index 0000000..da37b37 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/stat/descriptive/rank/package-info.java @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Summary statistics based on ranks. + */ +package org.apache.commons.math3.stat.descriptive.rank; diff --git a/src/main/java/org/apache/commons/math3/stat/descriptive/summary/Product.java b/src/main/java/org/apache/commons/math3/stat/descriptive/summary/Product.java new file mode 100644 index 0000000..7d313a5 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/stat/descriptive/summary/Product.java @@ -0,0 +1,230 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.stat.descriptive.summary; + +import java.io.Serializable; + +import org.apache.commons.math3.exception.MathIllegalArgumentException; +import org.apache.commons.math3.exception.NullArgumentException; +import org.apache.commons.math3.stat.descriptive.AbstractStorelessUnivariateStatistic; +import org.apache.commons.math3.stat.descriptive.WeightedEvaluation; +import org.apache.commons.math3.util.FastMath; +import org.apache.commons.math3.util.MathUtils; + +/** + * Returns the product of the available values. + * <p> + * If there are no values in the dataset, then 1 is returned. + * If any of the values are + * <code>NaN</code>, then <code>NaN</code> is returned.</p> + * <p> + * <strong>Note that this implementation is not synchronized.</strong> If + * multiple threads access an instance of this class concurrently, and at least + * one of the threads invokes the <code>increment()</code> or + * <code>clear()</code> method, it must be synchronized externally.</p> + * + */ +public class Product extends AbstractStorelessUnivariateStatistic implements Serializable, WeightedEvaluation { + + /** Serializable version identifier */ + private static final long serialVersionUID = 2824226005990582538L; + + /**The number of values that have been added */ + private long n; + + /** + * The current Running Product. + */ + private double value; + + /** + * Create a Product instance + */ + public Product() { + n = 0; + value = 1; + } + + /** + * Copy constructor, creates a new {@code Product} identical + * to the {@code original} + * + * @param original the {@code Product} instance to copy + * @throws NullArgumentException if original is null + */ + public Product(Product original) throws NullArgumentException { + copy(original, this); + } + + /** + * {@inheritDoc} + */ + @Override + public void increment(final double d) { + value *= d; + n++; + } + + /** + * {@inheritDoc} + */ + @Override + public double getResult() { + return value; + } + + /** + * {@inheritDoc} + */ + public long getN() { + return n; + } + + /** + * {@inheritDoc} + */ + @Override + public void clear() { + value = 1; + n = 0; + } + + /** + * Returns the product of the entries in the specified portion of + * the input array, or <code>Double.NaN</code> if the designated subarray + * is empty. + * <p> + * Throws <code>MathIllegalArgumentException</code> if the array is null.</p> + * + * @param values the input array + * @param begin index of the first array element to include + * @param length the number of elements to include + * @return the product of the values or 1 if length = 0 + * @throws MathIllegalArgumentException if the array is null or the array index + * parameters are not valid + */ + @Override + public double evaluate(final double[] values, final int begin, final int length) + throws MathIllegalArgumentException { + double product = Double.NaN; + if (test(values, begin, length, true)) { + product = 1.0; + for (int i = begin; i < begin + length; i++) { + product *= values[i]; + } + } + return product; + } + + /** + * <p>Returns the weighted product of the entries in the specified portion of + * the input array, or <code>Double.NaN</code> if the designated subarray + * is empty.</p> + * + * <p>Throws <code>MathIllegalArgumentException</code> if any of the following are true: + * <ul><li>the values array is null</li> + * <li>the weights array is null</li> + * <li>the weights array does not have the same length as the values array</li> + * <li>the weights array contains one or more infinite values</li> + * <li>the weights array contains one or more NaN values</li> + * <li>the weights array contains negative values</li> + * <li>the start and length arguments do not determine a valid array</li> + * </ul></p> + * + * <p>Uses the formula, <pre> + * weighted product = ∏values[i]<sup>weights[i]</sup> + * </pre> + * that is, the weights are applied as exponents when computing the weighted product.</p> + * + * @param values the input array + * @param weights the weights array + * @param begin index of the first array element to include + * @param length the number of elements to include + * @return the product of the values or 1 if length = 0 + * @throws MathIllegalArgumentException if the parameters are not valid + * @since 2.1 + */ + public double evaluate(final double[] values, final double[] weights, + final int begin, final int length) throws MathIllegalArgumentException { + double product = Double.NaN; + if (test(values, weights, begin, length, true)) { + product = 1.0; + for (int i = begin; i < begin + length; i++) { + product *= FastMath.pow(values[i], weights[i]); + } + } + return product; + } + + /** + * <p>Returns the weighted product of the entries in the input array.</p> + * + * <p>Throws <code>MathIllegalArgumentException</code> if any of the following are true: + * <ul><li>the values array is null</li> + * <li>the weights array is null</li> + * <li>the weights array does not have the same length as the values array</li> + * <li>the weights array contains one or more infinite values</li> + * <li>the weights array contains one or more NaN values</li> + * <li>the weights array contains negative values</li> + * </ul></p> + * + * <p>Uses the formula, <pre> + * weighted product = ∏values[i]<sup>weights[i]</sup> + * </pre> + * that is, the weights are applied as exponents when computing the weighted product.</p> + * + * @param values the input array + * @param weights the weights array + * @return the product of the values or Double.NaN if length = 0 + * @throws MathIllegalArgumentException if the parameters are not valid + * @since 2.1 + */ + public double evaluate(final double[] values, final double[] weights) + throws MathIllegalArgumentException { + return evaluate(values, weights, 0, values.length); + } + + + /** + * {@inheritDoc} + */ + @Override + public Product copy() { + Product result = new Product(); + // No try-catch or advertised exception because args are valid + copy(this, result); + return result; + } + + /** + * Copies source to dest. + * <p>Neither source nor dest can be null.</p> + * + * @param source Product to copy + * @param dest Product to copy to + * @throws NullArgumentException if either source or dest is null + */ + public static void copy(Product source, Product dest) + throws NullArgumentException { + MathUtils.checkNotNull(source); + MathUtils.checkNotNull(dest); + dest.setData(source.getDataRef()); + dest.n = source.n; + dest.value = source.value; + } + +} diff --git a/src/main/java/org/apache/commons/math3/stat/descriptive/summary/Sum.java b/src/main/java/org/apache/commons/math3/stat/descriptive/summary/Sum.java new file mode 100644 index 0000000..e12b6a1 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/stat/descriptive/summary/Sum.java @@ -0,0 +1,226 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.stat.descriptive.summary; + +import java.io.Serializable; + +import org.apache.commons.math3.exception.MathIllegalArgumentException; +import org.apache.commons.math3.exception.NullArgumentException; +import org.apache.commons.math3.stat.descriptive.AbstractStorelessUnivariateStatistic; +import org.apache.commons.math3.util.MathUtils; + + +/** + * Returns the sum of the available values. + * <p> + * If there are no values in the dataset, then 0 is returned. + * If any of the values are + * <code>NaN</code>, then <code>NaN</code> is returned.</p> + * <p> + * <strong>Note that this implementation is not synchronized.</strong> If + * multiple threads access an instance of this class concurrently, and at least + * one of the threads invokes the <code>increment()</code> or + * <code>clear()</code> method, it must be synchronized externally.</p> + * + */ +public class Sum extends AbstractStorelessUnivariateStatistic implements Serializable { + + /** Serializable version identifier */ + private static final long serialVersionUID = -8231831954703408316L; + + /** */ + private long n; + + /** + * The currently running sum. + */ + private double value; + + /** + * Create a Sum instance + */ + public Sum() { + n = 0; + value = 0; + } + + /** + * Copy constructor, creates a new {@code Sum} identical + * to the {@code original} + * + * @param original the {@code Sum} instance to copy + * @throws NullArgumentException if original is null + */ + public Sum(Sum original) throws NullArgumentException { + copy(original, this); + } + + /** + * {@inheritDoc} + */ + @Override + public void increment(final double d) { + value += d; + n++; + } + + /** + * {@inheritDoc} + */ + @Override + public double getResult() { + return value; + } + + /** + * {@inheritDoc} + */ + public long getN() { + return n; + } + + /** + * {@inheritDoc} + */ + @Override + public void clear() { + value = 0; + n = 0; + } + + /** + * The sum of the entries in the specified portion of + * the input array, or 0 if the designated subarray + * is empty. + * <p> + * Throws <code>MathIllegalArgumentException</code> if the array is null.</p> + * + * @param values the input array + * @param begin index of the first array element to include + * @param length the number of elements to include + * @return the sum of the values or 0 if length = 0 + * @throws MathIllegalArgumentException if the array is null or the array index + * parameters are not valid + */ + @Override + public double evaluate(final double[] values, final int begin, final int length) + throws MathIllegalArgumentException { + double sum = Double.NaN; + if (test(values, begin, length, true)) { + sum = 0.0; + for (int i = begin; i < begin + length; i++) { + sum += values[i]; + } + } + return sum; + } + + /** + * The weighted sum of the entries in the specified portion of + * the input array, or 0 if the designated subarray + * is empty. + * <p> + * Throws <code>MathIllegalArgumentException</code> if any of the following are true: + * <ul><li>the values array is null</li> + * <li>the weights array is null</li> + * <li>the weights array does not have the same length as the values array</li> + * <li>the weights array contains one or more infinite values</li> + * <li>the weights array contains one or more NaN values</li> + * <li>the weights array contains negative values</li> + * <li>the start and length arguments do not determine a valid array</li> + * </ul></p> + * <p> + * Uses the formula, <pre> + * weighted sum = Σ(values[i] * weights[i]) + * </pre></p> + * + * @param values the input array + * @param weights the weights array + * @param begin index of the first array element to include + * @param length the number of elements to include + * @return the sum of the values or 0 if length = 0 + * @throws MathIllegalArgumentException if the parameters are not valid + * @since 2.1 + */ + public double evaluate(final double[] values, final double[] weights, + final int begin, final int length) throws MathIllegalArgumentException { + double sum = Double.NaN; + if (test(values, weights, begin, length, true)) { + sum = 0.0; + for (int i = begin; i < begin + length; i++) { + sum += values[i] * weights[i]; + } + } + return sum; + } + + /** + * The weighted sum of the entries in the the input array. + * <p> + * Throws <code>MathIllegalArgumentException</code> if any of the following are true: + * <ul><li>the values array is null</li> + * <li>the weights array is null</li> + * <li>the weights array does not have the same length as the values array</li> + * <li>the weights array contains one or more infinite values</li> + * <li>the weights array contains one or more NaN values</li> + * <li>the weights array contains negative values</li> + * </ul></p> + * <p> + * Uses the formula, <pre> + * weighted sum = Σ(values[i] * weights[i]) + * </pre></p> + * + * @param values the input array + * @param weights the weights array + * @return the sum of the values or Double.NaN if length = 0 + * @throws MathIllegalArgumentException if the parameters are not valid + * @since 2.1 + */ + public double evaluate(final double[] values, final double[] weights) + throws MathIllegalArgumentException { + return evaluate(values, weights, 0, values.length); + } + + /** + * {@inheritDoc} + */ + @Override + public Sum copy() { + Sum result = new Sum(); + // No try-catch or advertised exception because args are valid + copy(this, result); + return result; + } + + /** + * Copies source to dest. + * <p>Neither source nor dest can be null.</p> + * + * @param source Sum to copy + * @param dest Sum to copy to + * @throws NullArgumentException if either source or dest is null + */ + public static void copy(Sum source, Sum dest) + throws NullArgumentException { + MathUtils.checkNotNull(source); + MathUtils.checkNotNull(dest); + dest.setData(source.getDataRef()); + dest.n = source.n; + dest.value = source.value; + } + +} diff --git a/src/main/java/org/apache/commons/math3/stat/descriptive/summary/SumOfLogs.java b/src/main/java/org/apache/commons/math3/stat/descriptive/summary/SumOfLogs.java new file mode 100644 index 0000000..19718af --- /dev/null +++ b/src/main/java/org/apache/commons/math3/stat/descriptive/summary/SumOfLogs.java @@ -0,0 +1,170 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.stat.descriptive.summary; + +import java.io.Serializable; + +import org.apache.commons.math3.exception.MathIllegalArgumentException; +import org.apache.commons.math3.exception.NullArgumentException; +import org.apache.commons.math3.stat.descriptive.AbstractStorelessUnivariateStatistic; +import org.apache.commons.math3.util.FastMath; +import org.apache.commons.math3.util.MathUtils; + +/** + * Returns the sum of the natural logs for this collection of values. + * <p> + * Uses {@link org.apache.commons.math3.util.FastMath#log(double)} to compute the logs. + * Therefore, + * <ul> + * <li>If any of values are < 0, the result is <code>NaN.</code></li> + * <li>If all values are non-negative and less than + * <code>Double.POSITIVE_INFINITY</code>, but at least one value is 0, the + * result is <code>Double.NEGATIVE_INFINITY.</code></li> + * <li>If both <code>Double.POSITIVE_INFINITY</code> and + * <code>Double.NEGATIVE_INFINITY</code> are among the values, the result is + * <code>NaN.</code></li> + * </ul></p> + * <p> + * <strong>Note that this implementation is not synchronized.</strong> If + * multiple threads access an instance of this class concurrently, and at least + * one of the threads invokes the <code>increment()</code> or + * <code>clear()</code> method, it must be synchronized externally.</p> + * + */ +public class SumOfLogs extends AbstractStorelessUnivariateStatistic implements Serializable { + + /** Serializable version identifier */ + private static final long serialVersionUID = -370076995648386763L; + + /**Number of values that have been added */ + private int n; + + /** + * The currently running value + */ + private double value; + + /** + * Create a SumOfLogs instance + */ + public SumOfLogs() { + value = 0d; + n = 0; + } + + /** + * Copy constructor, creates a new {@code SumOfLogs} identical + * to the {@code original} + * + * @param original the {@code SumOfLogs} instance to copy + * @throws NullArgumentException if original is null + */ + public SumOfLogs(SumOfLogs original) throws NullArgumentException { + copy(original, this); + } + + /** + * {@inheritDoc} + */ + @Override + public void increment(final double d) { + value += FastMath.log(d); + n++; + } + + /** + * {@inheritDoc} + */ + @Override + public double getResult() { + return value; + } + + /** + * {@inheritDoc} + */ + public long getN() { + return n; + } + + /** + * {@inheritDoc} + */ + @Override + public void clear() { + value = 0d; + n = 0; + } + + /** + * Returns the sum of the natural logs of the entries in the specified portion of + * the input array, or <code>Double.NaN</code> if the designated subarray + * is empty. + * <p> + * Throws <code>MathIllegalArgumentException</code> if the array is null.</p> + * <p> + * See {@link SumOfLogs}.</p> + * + * @param values the input array + * @param begin index of the first array element to include + * @param length the number of elements to include + * @return the sum of the natural logs of the values or 0 if + * length = 0 + * @throws MathIllegalArgumentException if the array is null or the array index + * parameters are not valid + */ + @Override + public double evaluate(final double[] values, final int begin, final int length) + throws MathIllegalArgumentException { + double sumLog = Double.NaN; + if (test(values, begin, length, true)) { + sumLog = 0.0; + for (int i = begin; i < begin + length; i++) { + sumLog += FastMath.log(values[i]); + } + } + return sumLog; + } + + /** + * {@inheritDoc} + */ + @Override + public SumOfLogs copy() { + SumOfLogs result = new SumOfLogs(); + // No try-catch or advertised exception here because args are valid + copy(this, result); + return result; + } + + /** + * Copies source to dest. + * <p>Neither source nor dest can be null.</p> + * + * @param source SumOfLogs to copy + * @param dest SumOfLogs to copy to + * @throws NullArgumentException if either source or dest is null + */ + public static void copy(SumOfLogs source, SumOfLogs dest) + throws NullArgumentException { + MathUtils.checkNotNull(source); + MathUtils.checkNotNull(dest); + dest.setData(source.getDataRef()); + dest.n = source.n; + dest.value = source.value; + } +} diff --git a/src/main/java/org/apache/commons/math3/stat/descriptive/summary/SumOfSquares.java b/src/main/java/org/apache/commons/math3/stat/descriptive/summary/SumOfSquares.java new file mode 100644 index 0000000..161d8c8 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/stat/descriptive/summary/SumOfSquares.java @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.stat.descriptive.summary; + +import java.io.Serializable; + +import org.apache.commons.math3.exception.MathIllegalArgumentException; +import org.apache.commons.math3.exception.NullArgumentException; +import org.apache.commons.math3.stat.descriptive.AbstractStorelessUnivariateStatistic; +import org.apache.commons.math3.util.MathUtils; + +/** + * Returns the sum of the squares of the available values. + * <p> + * If there are no values in the dataset, then 0 is returned. + * If any of the values are + * <code>NaN</code>, then <code>NaN</code> is returned.</p> + * <p> + * <strong>Note that this implementation is not synchronized.</strong> If + * multiple threads access an instance of this class concurrently, and at least + * one of the threads invokes the <code>increment()</code> or + * <code>clear()</code> method, it must be synchronized externally.</p> + * + */ +public class SumOfSquares extends AbstractStorelessUnivariateStatistic implements Serializable { + + /** Serializable version identifier */ + private static final long serialVersionUID = 1460986908574398008L; + + /** */ + private long n; + + /** + * The currently running sumSq + */ + private double value; + + /** + * Create a SumOfSquares instance + */ + public SumOfSquares() { + n = 0; + value = 0; + } + + /** + * Copy constructor, creates a new {@code SumOfSquares} identical + * to the {@code original} + * + * @param original the {@code SumOfSquares} instance to copy + * @throws NullArgumentException if original is null + */ + public SumOfSquares(SumOfSquares original) throws NullArgumentException { + copy(original, this); + } + + /** + * {@inheritDoc} + */ + @Override + public void increment(final double d) { + value += d * d; + n++; + } + + /** + * {@inheritDoc} + */ + @Override + public double getResult() { + return value; + } + + /** + * {@inheritDoc} + */ + public long getN() { + return n; + } + + /** + * {@inheritDoc} + */ + @Override + public void clear() { + value = 0; + n = 0; + } + + /** + * Returns the sum of the squares of the entries in the specified portion of + * the input array, or <code>Double.NaN</code> if the designated subarray + * is empty. + * <p> + * Throws <code>MathIllegalArgumentException</code> if the array is null.</p> + * + * @param values the input array + * @param begin index of the first array element to include + * @param length the number of elements to include + * @return the sum of the squares of the values or 0 if length = 0 + * @throws MathIllegalArgumentException if the array is null or the array index + * parameters are not valid + */ + @Override + public double evaluate(final double[] values,final int begin, final int length) + throws MathIllegalArgumentException { + double sumSq = Double.NaN; + if (test(values, begin, length, true)) { + sumSq = 0.0; + for (int i = begin; i < begin + length; i++) { + sumSq += values[i] * values[i]; + } + } + return sumSq; + } + + /** + * {@inheritDoc} + */ + @Override + public SumOfSquares copy() { + SumOfSquares result = new SumOfSquares(); + // no try-catch or advertised exception here because args are valid + copy(this, result); + return result; + } + + /** + * Copies source to dest. + * <p>Neither source nor dest can be null.</p> + * + * @param source SumOfSquares to copy + * @param dest SumOfSquares to copy to + * @throws NullArgumentException if either source or dest is null + */ + public static void copy(SumOfSquares source, SumOfSquares dest) + throws NullArgumentException { + MathUtils.checkNotNull(source); + MathUtils.checkNotNull(dest); + dest.setData(source.getDataRef()); + dest.n = source.n; + dest.value = source.value; + } + +} diff --git a/src/main/java/org/apache/commons/math3/stat/descriptive/summary/package-info.java b/src/main/java/org/apache/commons/math3/stat/descriptive/summary/package-info.java new file mode 100644 index 0000000..2f07145 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/stat/descriptive/summary/package-info.java @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Other summary statistics. + */ +package org.apache.commons.math3.stat.descriptive.summary; |