diff options
Diffstat (limited to 'src/main/java/org/apache/commons/math3/distribution')
43 files changed, 10631 insertions, 0 deletions
diff --git a/src/main/java/org/apache/commons/math3/distribution/AbstractIntegerDistribution.java b/src/main/java/org/apache/commons/math3/distribution/AbstractIntegerDistribution.java new file mode 100644 index 0000000..102700e --- /dev/null +++ b/src/main/java/org/apache/commons/math3/distribution/AbstractIntegerDistribution.java @@ -0,0 +1,250 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.distribution; + +import org.apache.commons.math3.exception.MathInternalError; +import org.apache.commons.math3.exception.NotStrictlyPositiveException; +import org.apache.commons.math3.exception.NumberIsTooLargeException; +import org.apache.commons.math3.exception.OutOfRangeException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.random.RandomGenerator; +import org.apache.commons.math3.util.FastMath; + +import java.io.Serializable; + +/** + * Base class for integer-valued discrete distributions. Default implementations are provided for + * some of the methods that do not vary from distribution to distribution. + */ +public abstract class AbstractIntegerDistribution implements IntegerDistribution, Serializable { + + /** Serializable version identifier */ + private static final long serialVersionUID = -1146319659338487221L; + + /** + * RandomData instance used to generate samples from the distribution. + * + * @deprecated As of 3.1, to be removed in 4.0. Please use the {@link #random} instance variable + * instead. + */ + @Deprecated + protected final org.apache.commons.math3.random.RandomDataImpl randomData = + new org.apache.commons.math3.random.RandomDataImpl(); + + /** + * RNG instance used to generate samples from the distribution. + * + * @since 3.1 + */ + protected final RandomGenerator random; + + /** + * @deprecated As of 3.1, to be removed in 4.0. Please use {@link + * #AbstractIntegerDistribution(RandomGenerator)} instead. + */ + @Deprecated + protected AbstractIntegerDistribution() { + // Legacy users are only allowed to access the deprecated "randomData". + // New users are forbidden to use this constructor. + random = null; + } + + /** + * @param rng Random number generator. + * @since 3.1 + */ + protected AbstractIntegerDistribution(RandomGenerator rng) { + random = rng; + } + + /** + * {@inheritDoc} + * + * <p>The default implementation uses the identity + * + * <p>{@code P(x0 < X <= x1) = P(X <= x1) - P(X <= x0)} + */ + public double cumulativeProbability(int x0, int x1) throws NumberIsTooLargeException { + if (x1 < x0) { + throw new NumberIsTooLargeException( + LocalizedFormats.LOWER_ENDPOINT_ABOVE_UPPER_ENDPOINT, x0, x1, true); + } + return cumulativeProbability(x1) - cumulativeProbability(x0); + } + + /** + * {@inheritDoc} + * + * <p>The default implementation returns + * + * <ul> + * <li>{@link #getSupportLowerBound()} for {@code p = 0}, + * <li>{@link #getSupportUpperBound()} for {@code p = 1}, and + * <li>{@link #solveInverseCumulativeProbability(double, int, int)} for {@code 0 < p < 1}. + * </ul> + */ + public int inverseCumulativeProbability(final double p) throws OutOfRangeException { + if (p < 0.0 || p > 1.0) { + throw new OutOfRangeException(p, 0, 1); + } + + int lower = getSupportLowerBound(); + if (p == 0.0) { + return lower; + } + if (lower == Integer.MIN_VALUE) { + if (checkedCumulativeProbability(lower) >= p) { + return lower; + } + } else { + lower -= 1; // this ensures cumulativeProbability(lower) < p, which + // is important for the solving step + } + + int upper = getSupportUpperBound(); + if (p == 1.0) { + return upper; + } + + // use the one-sided Chebyshev inequality to narrow the bracket + // cf. AbstractRealDistribution.inverseCumulativeProbability(double) + final double mu = getNumericalMean(); + final double sigma = FastMath.sqrt(getNumericalVariance()); + final boolean chebyshevApplies = + !(Double.isInfinite(mu) + || Double.isNaN(mu) + || Double.isInfinite(sigma) + || Double.isNaN(sigma) + || sigma == 0.0); + if (chebyshevApplies) { + double k = FastMath.sqrt((1.0 - p) / p); + double tmp = mu - k * sigma; + if (tmp > lower) { + lower = ((int) FastMath.ceil(tmp)) - 1; + } + k = 1.0 / k; + tmp = mu + k * sigma; + if (tmp < upper) { + upper = ((int) FastMath.ceil(tmp)) - 1; + } + } + + return solveInverseCumulativeProbability(p, lower, upper); + } + + /** + * This is a utility function used by {@link #inverseCumulativeProbability(double)}. It assumes + * {@code 0 < p < 1} and that the inverse cumulative probability lies in the bracket {@code + * (lower, upper]}. The implementation does simple bisection to find the smallest {@code + * p}-quantile <code>inf{x in Z | P(X<=x) >= p}</code>. + * + * @param p the cumulative probability + * @param lower a value satisfying {@code cumulativeProbability(lower) < p} + * @param upper a value satisfying {@code p <= cumulativeProbability(upper)} + * @return the smallest {@code p}-quantile of this distribution + */ + protected int solveInverseCumulativeProbability(final double p, int lower, int upper) { + while (lower + 1 < upper) { + int xm = (lower + upper) / 2; + if (xm < lower || xm > upper) { + /* + * Overflow. + * There will never be an overflow in both calculation methods + * for xm at the same time + */ + xm = lower + (upper - lower) / 2; + } + + double pm = checkedCumulativeProbability(xm); + if (pm >= p) { + upper = xm; + } else { + lower = xm; + } + } + return upper; + } + + /** {@inheritDoc} */ + public void reseedRandomGenerator(long seed) { + random.setSeed(seed); + randomData.reSeed(seed); + } + + /** + * {@inheritDoc} + * + * <p>The default implementation uses the <a + * href="http://en.wikipedia.org/wiki/Inverse_transform_sampling">inversion method</a>. + */ + public int sample() { + return inverseCumulativeProbability(random.nextDouble()); + } + + /** + * {@inheritDoc} + * + * <p>The default implementation generates the sample by calling {@link #sample()} in a loop. + */ + public int[] sample(int sampleSize) { + if (sampleSize <= 0) { + throw new NotStrictlyPositiveException(LocalizedFormats.NUMBER_OF_SAMPLES, sampleSize); + } + int[] out = new int[sampleSize]; + for (int i = 0; i < sampleSize; i++) { + out[i] = sample(); + } + return out; + } + + /** + * Computes the cumulative probability function and checks for {@code NaN} values returned. + * Throws {@code MathInternalError} if the value is {@code NaN}. Rethrows any exception + * encountered evaluating the cumulative probability function. Throws {@code MathInternalError} + * if the cumulative probability function returns {@code NaN}. + * + * @param argument input value + * @return the cumulative probability + * @throws MathInternalError if the cumulative probability is {@code NaN} + */ + private double checkedCumulativeProbability(int argument) throws MathInternalError { + double result = Double.NaN; + result = cumulativeProbability(argument); + if (Double.isNaN(result)) { + throw new MathInternalError( + LocalizedFormats.DISCRETE_CUMULATIVE_PROBABILITY_RETURNED_NAN, argument); + } + return result; + } + + /** + * For a random variable {@code X} whose values are distributed according to this distribution, + * this method returns {@code log(P(X = x))}, where {@code log} is the natural logarithm. In + * other words, this method represents the logarithm of the probability mass function (PMF) for + * the distribution. Note that due to the floating point precision and under/overflow issues, + * this method will for some distributions be more precise and faster than computing the + * logarithm of {@link #probability(int)}. + * + * <p>The default implementation simply computes the logarithm of {@code probability(x)}. + * + * @param x the point at which the PMF is evaluated + * @return the logarithm of the value of the probability mass function at {@code x} + */ + public double logProbability(int x) { + return FastMath.log(probability(x)); + } +} diff --git a/src/main/java/org/apache/commons/math3/distribution/AbstractMultivariateRealDistribution.java b/src/main/java/org/apache/commons/math3/distribution/AbstractMultivariateRealDistribution.java new file mode 100644 index 0000000..98e9348 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/distribution/AbstractMultivariateRealDistribution.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.distribution; + +import org.apache.commons.math3.exception.NotStrictlyPositiveException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.random.RandomGenerator; + +/** + * Base class for multivariate probability distributions. + * + * @since 3.1 + */ +public abstract class AbstractMultivariateRealDistribution implements MultivariateRealDistribution { + /** RNG instance used to generate samples from the distribution. */ + protected final RandomGenerator random; + + /** The number of dimensions or columns in the multivariate distribution. */ + private final int dimension; + + /** + * @param rng Random number generator. + * @param n Number of dimensions. + */ + protected AbstractMultivariateRealDistribution(RandomGenerator rng, int n) { + random = rng; + dimension = n; + } + + /** {@inheritDoc} */ + public void reseedRandomGenerator(long seed) { + random.setSeed(seed); + } + + /** {@inheritDoc} */ + public int getDimension() { + return dimension; + } + + /** {@inheritDoc} */ + public abstract double[] sample(); + + /** {@inheritDoc} */ + public double[][] sample(final int sampleSize) { + if (sampleSize <= 0) { + throw new NotStrictlyPositiveException(LocalizedFormats.NUMBER_OF_SAMPLES, sampleSize); + } + final double[][] out = new double[sampleSize][dimension]; + for (int i = 0; i < sampleSize; i++) { + out[i] = sample(); + } + return out; + } +} diff --git a/src/main/java/org/apache/commons/math3/distribution/AbstractRealDistribution.java b/src/main/java/org/apache/commons/math3/distribution/AbstractRealDistribution.java new file mode 100644 index 0000000..b9e5bca --- /dev/null +++ b/src/main/java/org/apache/commons/math3/distribution/AbstractRealDistribution.java @@ -0,0 +1,306 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.distribution; + +import org.apache.commons.math3.analysis.UnivariateFunction; +import org.apache.commons.math3.analysis.solvers.UnivariateSolverUtils; +import org.apache.commons.math3.exception.NotStrictlyPositiveException; +import org.apache.commons.math3.exception.NumberIsTooLargeException; +import org.apache.commons.math3.exception.OutOfRangeException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.random.RandomGenerator; +import org.apache.commons.math3.util.FastMath; + +import java.io.Serializable; + +/** + * Base class for probability distributions on the reals. Default implementations are provided for + * some of the methods that do not vary from distribution to distribution. + * + * @since 3.0 + */ +public abstract class AbstractRealDistribution implements RealDistribution, Serializable { + /** Default accuracy. */ + public static final double SOLVER_DEFAULT_ABSOLUTE_ACCURACY = 1e-6; + + /** Serializable version identifier */ + private static final long serialVersionUID = -38038050983108802L; + + /** + * RandomData instance used to generate samples from the distribution. + * + * @deprecated As of 3.1, to be removed in 4.0. Please use the {@link #random} instance variable + * instead. + */ + @Deprecated + protected org.apache.commons.math3.random.RandomDataImpl randomData = + new org.apache.commons.math3.random.RandomDataImpl(); + + /** + * RNG instance used to generate samples from the distribution. + * + * @since 3.1 + */ + protected final RandomGenerator random; + + /** Solver absolute accuracy for inverse cumulative computation */ + private double solverAbsoluteAccuracy = SOLVER_DEFAULT_ABSOLUTE_ACCURACY; + + /** + * @deprecated As of 3.1, to be removed in 4.0. Please use {@link + * #AbstractRealDistribution(RandomGenerator)} instead. + */ + @Deprecated + protected AbstractRealDistribution() { + // Legacy users are only allowed to access the deprecated "randomData". + // New users are forbidden to use this constructor. + random = null; + } + + /** + * @param rng Random number generator. + * @since 3.1 + */ + protected AbstractRealDistribution(RandomGenerator rng) { + random = rng; + } + + /** + * {@inheritDoc} + * + * <p>The default implementation uses the identity + * + * <p>{@code P(x0 < X <= x1) = P(X <= x1) - P(X <= x0)} + * + * @deprecated As of 3.1 (to be removed in 4.0). Please use {@link #probability(double,double)} + * instead. + */ + @Deprecated + public double cumulativeProbability(double x0, double x1) throws NumberIsTooLargeException { + return probability(x0, x1); + } + + /** + * For a random variable {@code X} whose values are distributed according to this distribution, + * this method returns {@code P(x0 < X <= x1)}. + * + * @param x0 Lower bound (excluded). + * @param x1 Upper bound (included). + * @return the probability that a random variable with this distribution takes a value between + * {@code x0} and {@code x1}, excluding the lower and including the upper endpoint. + * @throws NumberIsTooLargeException if {@code x0 > x1}. + * <p>The default implementation uses the identity {@code P(x0 < X <= x1) = P(X <= x1) - P(X + * <= x0)} + * @since 3.1 + */ + public double probability(double x0, double x1) { + if (x0 > x1) { + throw new NumberIsTooLargeException( + LocalizedFormats.LOWER_ENDPOINT_ABOVE_UPPER_ENDPOINT, x0, x1, true); + } + return cumulativeProbability(x1) - cumulativeProbability(x0); + } + + /** + * {@inheritDoc} + * + * <p>The default implementation returns + * + * <ul> + * <li>{@link #getSupportLowerBound()} for {@code p = 0}, + * <li>{@link #getSupportUpperBound()} for {@code p = 1}. + * </ul> + */ + public double inverseCumulativeProbability(final double p) throws OutOfRangeException { + /* + * IMPLEMENTATION NOTES + * -------------------- + * Where applicable, use is made of the one-sided Chebyshev inequality + * to bracket the root. This inequality states that + * P(X - mu >= k * sig) <= 1 / (1 + k^2), + * mu: mean, sig: standard deviation. Equivalently + * 1 - P(X < mu + k * sig) <= 1 / (1 + k^2), + * F(mu + k * sig) >= k^2 / (1 + k^2). + * + * For k = sqrt(p / (1 - p)), we find + * F(mu + k * sig) >= p, + * and (mu + k * sig) is an upper-bound for the root. + * + * Then, introducing Y = -X, mean(Y) = -mu, sd(Y) = sig, and + * P(Y >= -mu + k * sig) <= 1 / (1 + k^2), + * P(-X >= -mu + k * sig) <= 1 / (1 + k^2), + * P(X <= mu - k * sig) <= 1 / (1 + k^2), + * F(mu - k * sig) <= 1 / (1 + k^2). + * + * For k = sqrt((1 - p) / p), we find + * F(mu - k * sig) <= p, + * and (mu - k * sig) is a lower-bound for the root. + * + * In cases where the Chebyshev inequality does not apply, geometric + * progressions 1, 2, 4, ... and -1, -2, -4, ... are used to bracket + * the root. + */ + if (p < 0.0 || p > 1.0) { + throw new OutOfRangeException(p, 0, 1); + } + + double lowerBound = getSupportLowerBound(); + if (p == 0.0) { + return lowerBound; + } + + double upperBound = getSupportUpperBound(); + if (p == 1.0) { + return upperBound; + } + + final double mu = getNumericalMean(); + final double sig = FastMath.sqrt(getNumericalVariance()); + final boolean chebyshevApplies; + chebyshevApplies = + !(Double.isInfinite(mu) + || Double.isNaN(mu) + || Double.isInfinite(sig) + || Double.isNaN(sig)); + + if (lowerBound == Double.NEGATIVE_INFINITY) { + if (chebyshevApplies) { + lowerBound = mu - sig * FastMath.sqrt((1. - p) / p); + } else { + lowerBound = -1.0; + while (cumulativeProbability(lowerBound) >= p) { + lowerBound *= 2.0; + } + } + } + + if (upperBound == Double.POSITIVE_INFINITY) { + if (chebyshevApplies) { + upperBound = mu + sig * FastMath.sqrt(p / (1. - p)); + } else { + upperBound = 1.0; + while (cumulativeProbability(upperBound) < p) { + upperBound *= 2.0; + } + } + } + + final UnivariateFunction toSolve = + new UnivariateFunction() { + /** {@inheritDoc} */ + public double value(final double x) { + return cumulativeProbability(x) - p; + } + }; + + double x = + UnivariateSolverUtils.solve( + toSolve, lowerBound, upperBound, getSolverAbsoluteAccuracy()); + + if (!isSupportConnected()) { + /* Test for plateau. */ + final double dx = getSolverAbsoluteAccuracy(); + if (x - dx >= getSupportLowerBound()) { + double px = cumulativeProbability(x); + if (cumulativeProbability(x - dx) == px) { + upperBound = x; + while (upperBound - lowerBound > dx) { + final double midPoint = 0.5 * (lowerBound + upperBound); + if (cumulativeProbability(midPoint) < px) { + lowerBound = midPoint; + } else { + upperBound = midPoint; + } + } + return upperBound; + } + } + } + return x; + } + + /** + * Returns the solver absolute accuracy for inverse cumulative computation. You can override + * this method in order to use a Brent solver with an absolute accuracy different from the + * default. + * + * @return the maximum absolute error in inverse cumulative probability estimates + */ + protected double getSolverAbsoluteAccuracy() { + return solverAbsoluteAccuracy; + } + + /** {@inheritDoc} */ + public void reseedRandomGenerator(long seed) { + random.setSeed(seed); + randomData.reSeed(seed); + } + + /** + * {@inheritDoc} + * + * <p>The default implementation uses the <a + * href="http://en.wikipedia.org/wiki/Inverse_transform_sampling">inversion method. </a> + */ + public double sample() { + return inverseCumulativeProbability(random.nextDouble()); + } + + /** + * {@inheritDoc} + * + * <p>The default implementation generates the sample by calling {@link #sample()} in a loop. + */ + public double[] sample(int sampleSize) { + if (sampleSize <= 0) { + throw new NotStrictlyPositiveException(LocalizedFormats.NUMBER_OF_SAMPLES, sampleSize); + } + double[] out = new double[sampleSize]; + for (int i = 0; i < sampleSize; i++) { + out[i] = sample(); + } + return out; + } + + /** + * {@inheritDoc} + * + * @return zero. + * @since 3.1 + */ + public double probability(double x) { + return 0d; + } + + /** + * Returns the natural logarithm of the probability density function (PDF) of this distribution + * evaluated at the specified point {@code x}. In general, the PDF is the derivative of the + * {@link #cumulativeProbability(double) CDF}. If the derivative does not exist at {@code x}, + * then an appropriate replacement should be returned, e.g. {@code Double.POSITIVE_INFINITY}, + * {@code Double.NaN}, or the limit inferior or limit superior of the difference quotient. Note + * that due to the floating point precision and under/overflow issues, this method will for some + * distributions be more precise and faster than computing the logarithm of {@link + * #density(double)}. The default implementation simply computes the logarithm of {@code + * density(x)}. + * + * @param x the point at which the PDF is evaluated + * @return the logarithm of the value of the probability density function at point {@code x} + */ + public double logDensity(double x) { + return FastMath.log(density(x)); + } +} diff --git a/src/main/java/org/apache/commons/math3/distribution/BetaDistribution.java b/src/main/java/org/apache/commons/math3/distribution/BetaDistribution.java new file mode 100644 index 0000000..c7c2663 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/distribution/BetaDistribution.java @@ -0,0 +1,417 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.distribution; + +import org.apache.commons.math3.exception.NumberIsTooSmallException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.random.RandomGenerator; +import org.apache.commons.math3.random.Well19937c; +import org.apache.commons.math3.special.Beta; +import org.apache.commons.math3.special.Gamma; +import org.apache.commons.math3.util.FastMath; +import org.apache.commons.math3.util.Precision; + +/** + * Implements the Beta distribution. + * + * @see <a href="http://en.wikipedia.org/wiki/Beta_distribution">Beta distribution</a> + * @since 2.0 (changed to concrete class in 3.0) + */ +public class BetaDistribution extends AbstractRealDistribution { + /** + * Default inverse cumulative probability accuracy. + * + * @since 2.1 + */ + public static final double DEFAULT_INVERSE_ABSOLUTE_ACCURACY = 1e-9; + + /** Serializable version identifier. */ + private static final long serialVersionUID = -1221965979403477668L; + + /** First shape parameter. */ + private final double alpha; + + /** Second shape parameter. */ + private final double beta; + + /** + * Normalizing factor used in density computations. updated whenever alpha or beta are changed. + */ + private double z; + + /** Inverse cumulative probability accuracy. */ + private final double solverAbsoluteAccuracy; + + /** + * Build a new instance. + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link Well19937c} as + * random generator to be used for sampling only (see {@link #sample()} and {@link + * #sample(int)}). In case no sampling is needed for the created distribution, it is advised to + * pass {@code null} as random generator via the appropriate constructors to avoid the + * additional initialisation overhead. + * + * @param alpha First shape parameter (must be positive). + * @param beta Second shape parameter (must be positive). + */ + public BetaDistribution(double alpha, double beta) { + this(alpha, beta, DEFAULT_INVERSE_ABSOLUTE_ACCURACY); + } + + /** + * Build a new instance. + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link Well19937c} as + * random generator to be used for sampling only (see {@link #sample()} and {@link + * #sample(int)}). In case no sampling is needed for the created distribution, it is advised to + * pass {@code null} as random generator via the appropriate constructors to avoid the + * additional initialisation overhead. + * + * @param alpha First shape parameter (must be positive). + * @param beta Second shape parameter (must be positive). + * @param inverseCumAccuracy Maximum absolute error in inverse cumulative probability estimates + * (defaults to {@link #DEFAULT_INVERSE_ABSOLUTE_ACCURACY}). + * @since 2.1 + */ + public BetaDistribution(double alpha, double beta, double inverseCumAccuracy) { + this(new Well19937c(), alpha, beta, inverseCumAccuracy); + } + + /** + * Creates a β distribution. + * + * @param rng Random number generator. + * @param alpha First shape parameter (must be positive). + * @param beta Second shape parameter (must be positive). + * @since 3.3 + */ + public BetaDistribution(RandomGenerator rng, double alpha, double beta) { + this(rng, alpha, beta, DEFAULT_INVERSE_ABSOLUTE_ACCURACY); + } + + /** + * Creates a β distribution. + * + * @param rng Random number generator. + * @param alpha First shape parameter (must be positive). + * @param beta Second shape parameter (must be positive). + * @param inverseCumAccuracy Maximum absolute error in inverse cumulative probability estimates + * (defaults to {@link #DEFAULT_INVERSE_ABSOLUTE_ACCURACY}). + * @since 3.1 + */ + public BetaDistribution( + RandomGenerator rng, double alpha, double beta, double inverseCumAccuracy) { + super(rng); + + this.alpha = alpha; + this.beta = beta; + z = Double.NaN; + solverAbsoluteAccuracy = inverseCumAccuracy; + } + + /** + * Access the first shape parameter, {@code alpha}. + * + * @return the first shape parameter. + */ + public double getAlpha() { + return alpha; + } + + /** + * Access the second shape parameter, {@code beta}. + * + * @return the second shape parameter. + */ + public double getBeta() { + return beta; + } + + /** Recompute the normalization factor. */ + private void recomputeZ() { + if (Double.isNaN(z)) { + z = Gamma.logGamma(alpha) + Gamma.logGamma(beta) - Gamma.logGamma(alpha + beta); + } + } + + /** {@inheritDoc} */ + public double density(double x) { + final double logDensity = logDensity(x); + return logDensity == Double.NEGATIVE_INFINITY ? 0 : FastMath.exp(logDensity); + } + + /** {@inheritDoc} * */ + @Override + public double logDensity(double x) { + recomputeZ(); + if (x < 0 || x > 1) { + return Double.NEGATIVE_INFINITY; + } else if (x == 0) { + if (alpha < 1) { + throw new NumberIsTooSmallException( + LocalizedFormats.CANNOT_COMPUTE_BETA_DENSITY_AT_0_FOR_SOME_ALPHA, + alpha, + 1, + false); + } + return Double.NEGATIVE_INFINITY; + } else if (x == 1) { + if (beta < 1) { + throw new NumberIsTooSmallException( + LocalizedFormats.CANNOT_COMPUTE_BETA_DENSITY_AT_1_FOR_SOME_BETA, + beta, + 1, + false); + } + return Double.NEGATIVE_INFINITY; + } else { + double logX = FastMath.log(x); + double log1mX = FastMath.log1p(-x); + return (alpha - 1) * logX + (beta - 1) * log1mX - z; + } + } + + /** {@inheritDoc} */ + public double cumulativeProbability(double x) { + if (x <= 0) { + return 0; + } else if (x >= 1) { + return 1; + } else { + return Beta.regularizedBeta(x, alpha, beta); + } + } + + /** + * Return the absolute accuracy setting of the solver used to estimate inverse cumulative + * probabilities. + * + * @return the solver absolute accuracy. + * @since 2.1 + */ + @Override + protected double getSolverAbsoluteAccuracy() { + return solverAbsoluteAccuracy; + } + + /** + * {@inheritDoc} + * + * <p>For first shape parameter {@code alpha} and second shape parameter {@code beta}, the mean + * is {@code alpha / (alpha + beta)}. + */ + public double getNumericalMean() { + final double a = getAlpha(); + return a / (a + getBeta()); + } + + /** + * {@inheritDoc} + * + * <p>For first shape parameter {@code alpha} and second shape parameter {@code beta}, the + * variance is {@code (alpha * beta) / [(alpha + beta)^2 * (alpha + beta + 1)]}. + */ + public double getNumericalVariance() { + final double a = getAlpha(); + final double b = getBeta(); + final double alphabetasum = a + b; + return (a * b) / ((alphabetasum * alphabetasum) * (alphabetasum + 1)); + } + + /** + * {@inheritDoc} + * + * <p>The lower bound of the support is always 0 no matter the parameters. + * + * @return lower bound of the support (always 0) + */ + public double getSupportLowerBound() { + return 0; + } + + /** + * {@inheritDoc} + * + * <p>The upper bound of the support is always 1 no matter the parameters. + * + * @return upper bound of the support (always 1) + */ + public double getSupportUpperBound() { + return 1; + } + + /** {@inheritDoc} */ + public boolean isSupportLowerBoundInclusive() { + return false; + } + + /** {@inheritDoc} */ + public boolean isSupportUpperBoundInclusive() { + return false; + } + + /** + * {@inheritDoc} + * + * <p>The support of this distribution is connected. + * + * @return {@code true} + */ + public boolean isSupportConnected() { + return true; + } + + /** + * {@inheritDoc} + * + * <p>Sampling is performed using Cheng algorithms: + * + * <p>R. C. H. Cheng, "Generating beta variates with nonintegral shape parameters.". + * Communications of the ACM, 21, 317–322, 1978. + */ + @Override + public double sample() { + return ChengBetaSampler.sample(random, alpha, beta); + } + + /** + * Utility class implementing Cheng's algorithms for beta distribution sampling. + * + * <p>R. C. H. Cheng, "Generating beta variates with nonintegral shape parameters.". + * Communications of the ACM, 21, 317–322, 1978. + * + * @since 3.6 + */ + private static final class ChengBetaSampler { + + /** + * Returns one sample using Cheng's sampling algorithm. + * + * @param random random generator to use + * @param alpha distribution first shape parameter + * @param beta distribution second shape parameter + * @return sampled value + */ + static double sample(RandomGenerator random, final double alpha, final double beta) { + final double a = FastMath.min(alpha, beta); + final double b = FastMath.max(alpha, beta); + + if (a > 1) { + return algorithmBB(random, alpha, a, b); + } else { + return algorithmBC(random, alpha, b, a); + } + } + + /** + * Returns one sample using Cheng's BB algorithm, when both α and β are greater + * than 1. + * + * @param random random generator to use + * @param a0 distribution first shape parameter (α) + * @param a min(α, β) where α, β are the two distribution shape + * parameters + * @param b max(α, β) where α, β are the two distribution shape + * parameters + * @return sampled value + */ + private static double algorithmBB( + RandomGenerator random, final double a0, final double a, final double b) { + final double alpha = a + b; + final double beta = FastMath.sqrt((alpha - 2.) / (2. * a * b - alpha)); + final double gamma = a + 1. / beta; + + double r; + double w; + double t; + do { + final double u1 = random.nextDouble(); + final double u2 = random.nextDouble(); + final double v = beta * (FastMath.log(u1) - FastMath.log1p(-u1)); + w = a * FastMath.exp(v); + final double z = u1 * u1 * u2; + r = gamma * v - 1.3862944; + final double s = a + r - w; + if (s + 2.609438 >= 5 * z) { + break; + } + + t = FastMath.log(z); + if (s >= t) { + break; + } + } while (r + alpha * (FastMath.log(alpha) - FastMath.log(b + w)) < t); + + w = FastMath.min(w, Double.MAX_VALUE); + return Precision.equals(a, a0) ? w / (b + w) : b / (b + w); + } + + /** + * Returns one sample using Cheng's BC algorithm, when at least one of α and β is + * smaller than 1. + * + * @param random random generator to use + * @param a0 distribution first shape parameter (α) + * @param a max(α, β) where α, β are the two distribution shape + * parameters + * @param b min(α, β) where α, β are the two distribution shape + * parameters + * @return sampled value + */ + private static double algorithmBC( + RandomGenerator random, final double a0, final double a, final double b) { + final double alpha = a + b; + final double beta = 1. / b; + final double delta = 1. + a - b; + final double k1 = delta * (0.0138889 + 0.0416667 * b) / (a * beta - 0.777778); + final double k2 = 0.25 + (0.5 + 0.25 / delta) * b; + + double w; + for (; ; ) { + final double u1 = random.nextDouble(); + final double u2 = random.nextDouble(); + final double y = u1 * u2; + final double z = u1 * y; + if (u1 < 0.5) { + if (0.25 * u2 + z - y >= k1) { + continue; + } + } else { + if (z <= 0.25) { + final double v = beta * (FastMath.log(u1) - FastMath.log1p(-u1)); + w = a * FastMath.exp(v); + break; + } + + if (z >= k2) { + continue; + } + } + + final double v = beta * (FastMath.log(u1) - FastMath.log1p(-u1)); + w = a * FastMath.exp(v); + if (alpha * (FastMath.log(alpha) - FastMath.log(b + w) + v) - 1.3862944 + >= FastMath.log(z)) { + break; + } + } + + w = FastMath.min(w, Double.MAX_VALUE); + return Precision.equals(a, a0) ? w / (b + w) : b / (b + w); + } + } +} diff --git a/src/main/java/org/apache/commons/math3/distribution/BinomialDistribution.java b/src/main/java/org/apache/commons/math3/distribution/BinomialDistribution.java new file mode 100644 index 0000000..611666a --- /dev/null +++ b/src/main/java/org/apache/commons/math3/distribution/BinomialDistribution.java @@ -0,0 +1,195 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.distribution; + +import org.apache.commons.math3.exception.NotPositiveException; +import org.apache.commons.math3.exception.OutOfRangeException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.random.RandomGenerator; +import org.apache.commons.math3.random.Well19937c; +import org.apache.commons.math3.special.Beta; +import org.apache.commons.math3.util.FastMath; + +/** + * Implementation of the binomial distribution. + * + * @see <a href="http://en.wikipedia.org/wiki/Binomial_distribution">Binomial distribution + * (Wikipedia)</a> + * @see <a href="http://mathworld.wolfram.com/BinomialDistribution.html">Binomial Distribution + * (MathWorld)</a> + */ +public class BinomialDistribution extends AbstractIntegerDistribution { + /** Serializable version identifier. */ + private static final long serialVersionUID = 6751309484392813623L; + + /** The number of trials. */ + private final int numberOfTrials; + + /** The probability of success. */ + private final double probabilityOfSuccess; + + /** + * Create a binomial distribution with the given number of trials and probability of success. + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link Well19937c} as + * random generator to be used for sampling only (see {@link #sample()} and {@link + * #sample(int)}). In case no sampling is needed for the created distribution, it is advised to + * pass {@code null} as random generator via the appropriate constructors to avoid the + * additional initialisation overhead. + * + * @param trials Number of trials. + * @param p Probability of success. + * @throws NotPositiveException if {@code trials < 0}. + * @throws OutOfRangeException if {@code p < 0} or {@code p > 1}. + */ + public BinomialDistribution(int trials, double p) { + this(new Well19937c(), trials, p); + } + + /** + * Creates a binomial distribution. + * + * @param rng Random number generator. + * @param trials Number of trials. + * @param p Probability of success. + * @throws NotPositiveException if {@code trials < 0}. + * @throws OutOfRangeException if {@code p < 0} or {@code p > 1}. + * @since 3.1 + */ + public BinomialDistribution(RandomGenerator rng, int trials, double p) { + super(rng); + + if (trials < 0) { + throw new NotPositiveException(LocalizedFormats.NUMBER_OF_TRIALS, trials); + } + if (p < 0 || p > 1) { + throw new OutOfRangeException(p, 0, 1); + } + + probabilityOfSuccess = p; + numberOfTrials = trials; + } + + /** + * Access the number of trials for this distribution. + * + * @return the number of trials. + */ + public int getNumberOfTrials() { + return numberOfTrials; + } + + /** + * Access the probability of success for this distribution. + * + * @return the probability of success. + */ + public double getProbabilityOfSuccess() { + return probabilityOfSuccess; + } + + /** {@inheritDoc} */ + public double probability(int x) { + final double logProbability = logProbability(x); + return logProbability == Double.NEGATIVE_INFINITY ? 0 : FastMath.exp(logProbability); + } + + /** {@inheritDoc} * */ + @Override + public double logProbability(int x) { + if (numberOfTrials == 0) { + return (x == 0) ? 0. : Double.NEGATIVE_INFINITY; + } + double ret; + if (x < 0 || x > numberOfTrials) { + ret = Double.NEGATIVE_INFINITY; + } else { + ret = + SaddlePointExpansion.logBinomialProbability( + x, numberOfTrials, probabilityOfSuccess, 1.0 - probabilityOfSuccess); + } + return ret; + } + + /** {@inheritDoc} */ + public double cumulativeProbability(int x) { + double ret; + if (x < 0) { + ret = 0.0; + } else if (x >= numberOfTrials) { + ret = 1.0; + } else { + ret = 1.0 - Beta.regularizedBeta(probabilityOfSuccess, x + 1.0, numberOfTrials - x); + } + return ret; + } + + /** + * {@inheritDoc} + * + * <p>For {@code n} trials and probability parameter {@code p}, the mean is {@code n * p}. + */ + public double getNumericalMean() { + return numberOfTrials * probabilityOfSuccess; + } + + /** + * {@inheritDoc} + * + * <p>For {@code n} trials and probability parameter {@code p}, the variance is {@code n * p * + * (1 - p)}. + */ + public double getNumericalVariance() { + final double p = probabilityOfSuccess; + return numberOfTrials * p * (1 - p); + } + + /** + * {@inheritDoc} + * + * <p>The lower bound of the support is always 0 except for the probability parameter {@code p = + * 1}. + * + * @return lower bound of the support (0 or the number of trials) + */ + public int getSupportLowerBound() { + return probabilityOfSuccess < 1.0 ? 0 : numberOfTrials; + } + + /** + * {@inheritDoc} + * + * <p>The upper bound of the support is the number of trials except for the probability + * parameter {@code p = 0}. + * + * @return upper bound of the support (number of trials or 0) + */ + public int getSupportUpperBound() { + return probabilityOfSuccess > 0.0 ? numberOfTrials : 0; + } + + /** + * {@inheritDoc} + * + * <p>The support of this distribution is connected. + * + * @return {@code true} + */ + public boolean isSupportConnected() { + return true; + } +} diff --git a/src/main/java/org/apache/commons/math3/distribution/CauchyDistribution.java b/src/main/java/org/apache/commons/math3/distribution/CauchyDistribution.java new file mode 100644 index 0000000..8c235ea --- /dev/null +++ b/src/main/java/org/apache/commons/math3/distribution/CauchyDistribution.java @@ -0,0 +1,251 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.distribution; + +import org.apache.commons.math3.exception.NotStrictlyPositiveException; +import org.apache.commons.math3.exception.OutOfRangeException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.random.RandomGenerator; +import org.apache.commons.math3.random.Well19937c; +import org.apache.commons.math3.util.FastMath; + +/** + * Implementation of the Cauchy distribution. + * + * @see <a href="http://en.wikipedia.org/wiki/Cauchy_distribution">Cauchy distribution + * (Wikipedia)</a> + * @see <a href="http://mathworld.wolfram.com/CauchyDistribution.html">Cauchy Distribution + * (MathWorld)</a> + * @since 1.1 (changed to concrete class in 3.0) + */ +public class CauchyDistribution extends AbstractRealDistribution { + /** + * Default inverse cumulative probability accuracy. + * + * @since 2.1 + */ + public static final double DEFAULT_INVERSE_ABSOLUTE_ACCURACY = 1e-9; + + /** Serializable version identifier */ + private static final long serialVersionUID = 8589540077390120676L; + + /** The median of this distribution. */ + private final double median; + + /** The scale of this distribution. */ + private final double scale; + + /** Inverse cumulative probability accuracy */ + private final double solverAbsoluteAccuracy; + + /** Creates a Cauchy distribution with the median equal to zero and scale equal to one. */ + public CauchyDistribution() { + this(0, 1); + } + + /** + * Creates a Cauchy distribution using the given median and scale. + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link Well19937c} as + * random generator to be used for sampling only (see {@link #sample()} and {@link + * #sample(int)}). In case no sampling is needed for the created distribution, it is advised to + * pass {@code null} as random generator via the appropriate constructors to avoid the + * additional initialisation overhead. + * + * @param median Median for this distribution. + * @param scale Scale parameter for this distribution. + */ + public CauchyDistribution(double median, double scale) { + this(median, scale, DEFAULT_INVERSE_ABSOLUTE_ACCURACY); + } + + /** + * Creates a Cauchy distribution using the given median and scale. + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link Well19937c} as + * random generator to be used for sampling only (see {@link #sample()} and {@link + * #sample(int)}). In case no sampling is needed for the created distribution, it is advised to + * pass {@code null} as random generator via the appropriate constructors to avoid the + * additional initialisation overhead. + * + * @param median Median for this distribution. + * @param scale Scale parameter for this distribution. + * @param inverseCumAccuracy Maximum absolute error in inverse cumulative probability estimates + * (defaults to {@link #DEFAULT_INVERSE_ABSOLUTE_ACCURACY}). + * @throws NotStrictlyPositiveException if {@code scale <= 0}. + * @since 2.1 + */ + public CauchyDistribution(double median, double scale, double inverseCumAccuracy) { + this(new Well19937c(), median, scale, inverseCumAccuracy); + } + + /** + * Creates a Cauchy distribution. + * + * @param rng Random number generator. + * @param median Median for this distribution. + * @param scale Scale parameter for this distribution. + * @throws NotStrictlyPositiveException if {@code scale <= 0}. + * @since 3.3 + */ + public CauchyDistribution(RandomGenerator rng, double median, double scale) { + this(rng, median, scale, DEFAULT_INVERSE_ABSOLUTE_ACCURACY); + } + + /** + * Creates a Cauchy distribution. + * + * @param rng Random number generator. + * @param median Median for this distribution. + * @param scale Scale parameter for this distribution. + * @param inverseCumAccuracy Maximum absolute error in inverse cumulative probability estimates + * (defaults to {@link #DEFAULT_INVERSE_ABSOLUTE_ACCURACY}). + * @throws NotStrictlyPositiveException if {@code scale <= 0}. + * @since 3.1 + */ + public CauchyDistribution( + RandomGenerator rng, double median, double scale, double inverseCumAccuracy) { + super(rng); + if (scale <= 0) { + throw new NotStrictlyPositiveException(LocalizedFormats.SCALE, scale); + } + this.scale = scale; + this.median = median; + solverAbsoluteAccuracy = inverseCumAccuracy; + } + + /** {@inheritDoc} */ + public double cumulativeProbability(double x) { + return 0.5 + (FastMath.atan((x - median) / scale) / FastMath.PI); + } + + /** + * Access the median. + * + * @return the median for this distribution. + */ + public double getMedian() { + return median; + } + + /** + * Access the scale parameter. + * + * @return the scale parameter for this distribution. + */ + public double getScale() { + return scale; + } + + /** {@inheritDoc} */ + public double density(double x) { + final double dev = x - median; + return (1 / FastMath.PI) * (scale / (dev * dev + scale * scale)); + } + + /** + * {@inheritDoc} + * + * <p>Returns {@code Double.NEGATIVE_INFINITY} when {@code p == 0} and {@code + * Double.POSITIVE_INFINITY} when {@code p == 1}. + */ + @Override + public double inverseCumulativeProbability(double p) throws OutOfRangeException { + double ret; + if (p < 0 || p > 1) { + throw new OutOfRangeException(p, 0, 1); + } else if (p == 0) { + ret = Double.NEGATIVE_INFINITY; + } else if (p == 1) { + ret = Double.POSITIVE_INFINITY; + } else { + ret = median + scale * FastMath.tan(FastMath.PI * (p - .5)); + } + return ret; + } + + /** {@inheritDoc} */ + @Override + protected double getSolverAbsoluteAccuracy() { + return solverAbsoluteAccuracy; + } + + /** + * {@inheritDoc} + * + * <p>The mean is always undefined no matter the parameters. + * + * @return mean (always Double.NaN) + */ + public double getNumericalMean() { + return Double.NaN; + } + + /** + * {@inheritDoc} + * + * <p>The variance is always undefined no matter the parameters. + * + * @return variance (always Double.NaN) + */ + public double getNumericalVariance() { + return Double.NaN; + } + + /** + * {@inheritDoc} + * + * <p>The lower bound of the support is always negative infinity no matter the parameters. + * + * @return lower bound of the support (always Double.NEGATIVE_INFINITY) + */ + public double getSupportLowerBound() { + return Double.NEGATIVE_INFINITY; + } + + /** + * {@inheritDoc} + * + * <p>The upper bound of the support is always positive infinity no matter the parameters. + * + * @return upper bound of the support (always Double.POSITIVE_INFINITY) + */ + public double getSupportUpperBound() { + return Double.POSITIVE_INFINITY; + } + + /** {@inheritDoc} */ + public boolean isSupportLowerBoundInclusive() { + return false; + } + + /** {@inheritDoc} */ + public boolean isSupportUpperBoundInclusive() { + return false; + } + + /** + * {@inheritDoc} + * + * <p>The support of this distribution is connected. + * + * @return {@code true} + */ + public boolean isSupportConnected() { + return true; + } +} diff --git a/src/main/java/org/apache/commons/math3/distribution/ChiSquaredDistribution.java b/src/main/java/org/apache/commons/math3/distribution/ChiSquaredDistribution.java new file mode 100644 index 0000000..06af167 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/distribution/ChiSquaredDistribution.java @@ -0,0 +1,196 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.distribution; + +import org.apache.commons.math3.random.RandomGenerator; +import org.apache.commons.math3.random.Well19937c; + +/** + * Implementation of the chi-squared distribution. + * + * @see <a href="http://en.wikipedia.org/wiki/Chi-squared_distribution">Chi-squared distribution + * (Wikipedia)</a> + * @see <a href="http://mathworld.wolfram.com/Chi-SquaredDistribution.html">Chi-squared Distribution + * (MathWorld)</a> + */ +public class ChiSquaredDistribution extends AbstractRealDistribution { + /** + * Default inverse cumulative probability accuracy + * + * @since 2.1 + */ + public static final double DEFAULT_INVERSE_ABSOLUTE_ACCURACY = 1e-9; + + /** Serializable version identifier */ + private static final long serialVersionUID = -8352658048349159782L; + + /** Internal Gamma distribution. */ + private final GammaDistribution gamma; + + /** Inverse cumulative probability accuracy */ + private final double solverAbsoluteAccuracy; + + /** + * Create a Chi-Squared distribution with the given degrees of freedom. + * + * @param degreesOfFreedom Degrees of freedom. + */ + public ChiSquaredDistribution(double degreesOfFreedom) { + this(degreesOfFreedom, DEFAULT_INVERSE_ABSOLUTE_ACCURACY); + } + + /** + * Create a Chi-Squared distribution with the given degrees of freedom and inverse cumulative + * probability accuracy. + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link Well19937c} as + * random generator to be used for sampling only (see {@link #sample()} and {@link + * #sample(int)}). In case no sampling is needed for the created distribution, it is advised to + * pass {@code null} as random generator via the appropriate constructors to avoid the + * additional initialisation overhead. + * + * @param degreesOfFreedom Degrees of freedom. + * @param inverseCumAccuracy the maximum absolute error in inverse cumulative probability + * estimates (defaults to {@link #DEFAULT_INVERSE_ABSOLUTE_ACCURACY}). + * @since 2.1 + */ + public ChiSquaredDistribution(double degreesOfFreedom, double inverseCumAccuracy) { + this(new Well19937c(), degreesOfFreedom, inverseCumAccuracy); + } + + /** + * Create a Chi-Squared distribution with the given degrees of freedom. + * + * @param rng Random number generator. + * @param degreesOfFreedom Degrees of freedom. + * @since 3.3 + */ + public ChiSquaredDistribution(RandomGenerator rng, double degreesOfFreedom) { + this(rng, degreesOfFreedom, DEFAULT_INVERSE_ABSOLUTE_ACCURACY); + } + + /** + * Create a Chi-Squared distribution with the given degrees of freedom and inverse cumulative + * probability accuracy. + * + * @param rng Random number generator. + * @param degreesOfFreedom Degrees of freedom. + * @param inverseCumAccuracy the maximum absolute error in inverse cumulative probability + * estimates (defaults to {@link #DEFAULT_INVERSE_ABSOLUTE_ACCURACY}). + * @since 3.1 + */ + public ChiSquaredDistribution( + RandomGenerator rng, double degreesOfFreedom, double inverseCumAccuracy) { + super(rng); + + gamma = new GammaDistribution(degreesOfFreedom / 2, 2); + solverAbsoluteAccuracy = inverseCumAccuracy; + } + + /** + * Access the number of degrees of freedom. + * + * @return the degrees of freedom. + */ + public double getDegreesOfFreedom() { + return gamma.getShape() * 2.0; + } + + /** {@inheritDoc} */ + public double density(double x) { + return gamma.density(x); + } + + /** {@inheritDoc} * */ + @Override + public double logDensity(double x) { + return gamma.logDensity(x); + } + + /** {@inheritDoc} */ + public double cumulativeProbability(double x) { + return gamma.cumulativeProbability(x); + } + + /** {@inheritDoc} */ + @Override + protected double getSolverAbsoluteAccuracy() { + return solverAbsoluteAccuracy; + } + + /** + * {@inheritDoc} + * + * <p>For {@code k} degrees of freedom, the mean is {@code k}. + */ + public double getNumericalMean() { + return getDegreesOfFreedom(); + } + + /** + * {@inheritDoc} + * + * @return {@code 2 * k}, where {@code k} is the number of degrees of freedom. + */ + public double getNumericalVariance() { + return 2 * getDegreesOfFreedom(); + } + + /** + * {@inheritDoc} + * + * <p>The lower bound of the support is always 0 no matter the degrees of freedom. + * + * @return zero. + */ + public double getSupportLowerBound() { + return 0; + } + + /** + * {@inheritDoc} + * + * <p>The upper bound of the support is always positive infinity no matter the degrees of + * freedom. + * + * @return {@code Double.POSITIVE_INFINITY}. + */ + public double getSupportUpperBound() { + return Double.POSITIVE_INFINITY; + } + + /** {@inheritDoc} */ + public boolean isSupportLowerBoundInclusive() { + return true; + } + + /** {@inheritDoc} */ + public boolean isSupportUpperBoundInclusive() { + return false; + } + + /** + * {@inheritDoc} + * + * <p>The support of this distribution is connected. + * + * @return {@code true} + */ + public boolean isSupportConnected() { + return true; + } +} diff --git a/src/main/java/org/apache/commons/math3/distribution/ConstantRealDistribution.java b/src/main/java/org/apache/commons/math3/distribution/ConstantRealDistribution.java new file mode 100644 index 0000000..93ba255 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/distribution/ConstantRealDistribution.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.math3.distribution; + +import org.apache.commons.math3.exception.OutOfRangeException; + +/** + * Implementation of the constant real distribution. + * + * @since 3.4 + */ +public class ConstantRealDistribution extends AbstractRealDistribution { + + /** Serialization ID */ + private static final long serialVersionUID = -4157745166772046273L; + + /** Constant value of the distribution */ + private final double value; + + /** + * Create a constant real distribution with the given value. + * + * @param value the constant value of this distribution + */ + public ConstantRealDistribution(double value) { + super(null); // Avoid creating RandomGenerator + this.value = value; + } + + /** {@inheritDoc} */ + public double density(double x) { + return x == value ? 1 : 0; + } + + /** {@inheritDoc} */ + public double cumulativeProbability(double x) { + return x < value ? 0 : 1; + } + + /** {@inheritDoc} */ + @Override + public double inverseCumulativeProbability(final double p) throws OutOfRangeException { + if (p < 0.0 || p > 1.0) { + throw new OutOfRangeException(p, 0, 1); + } + return value; + } + + /** {@inheritDoc} */ + public double getNumericalMean() { + return value; + } + + /** {@inheritDoc} */ + public double getNumericalVariance() { + return 0; + } + + /** {@inheritDoc} */ + public double getSupportLowerBound() { + return value; + } + + /** {@inheritDoc} */ + public double getSupportUpperBound() { + return value; + } + + /** {@inheritDoc} */ + public boolean isSupportLowerBoundInclusive() { + return true; + } + + /** {@inheritDoc} */ + public boolean isSupportUpperBoundInclusive() { + return true; + } + + /** {@inheritDoc} */ + public boolean isSupportConnected() { + return true; + } + + /** {@inheritDoc} */ + @Override + public double sample() { + return value; + } + + /** + * Override with no-op (there is no generator). + * + * @param seed (ignored) + */ + @Override + public void reseedRandomGenerator(long seed) {} +} diff --git a/src/main/java/org/apache/commons/math3/distribution/EnumeratedDistribution.java b/src/main/java/org/apache/commons/math3/distribution/EnumeratedDistribution.java new file mode 100644 index 0000000..991a9f8 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/distribution/EnumeratedDistribution.java @@ -0,0 +1,283 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.distribution; + +import org.apache.commons.math3.exception.MathArithmeticException; +import org.apache.commons.math3.exception.NotANumberException; +import org.apache.commons.math3.exception.NotFiniteNumberException; +import org.apache.commons.math3.exception.NotPositiveException; +import org.apache.commons.math3.exception.NotStrictlyPositiveException; +import org.apache.commons.math3.exception.NullArgumentException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.random.RandomGenerator; +import org.apache.commons.math3.random.Well19937c; +import org.apache.commons.math3.util.MathArrays; +import org.apache.commons.math3.util.Pair; + +import java.io.Serializable; +import java.lang.reflect.Array; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +/** + * A generic implementation of a <a + * href="http://en.wikipedia.org/wiki/Probability_distribution#Discrete_probability_distribution"> + * discrete probability distribution (Wikipedia)</a> over a finite sample space, based on an + * enumerated list of <value, probability> pairs. Input probabilities must all be + * non-negative, but zero values are allowed and their sum does not have to equal one. Constructors + * will normalize input probabilities to make them sum to one. + * + * <p>The list of <value, probability> pairs does not, strictly speaking, have to be a function and + * it can contain null values. The pmf created by the constructor will combine probabilities of + * equal values and will treat null values as equal. For example, if the list of pairs <"dog", + * 0.2>, <null, 0.1>, <"pig", 0.2>, <"dog", 0.1>, <null, 0.4> is provided + * to the constructor, the resulting pmf will assign mass of 0.5 to null, 0.3 to "dog" and 0.2 to + * null. + * + * @param <T> type of the elements in the sample space. + * @since 3.2 + */ +public class EnumeratedDistribution<T> implements Serializable { + + /** Serializable UID. */ + private static final long serialVersionUID = 20123308L; + + /** RNG instance used to generate samples from the distribution. */ + protected final RandomGenerator random; + + /** List of random variable values. */ + private final List<T> singletons; + + /** + * Probabilities of respective random variable values. For i = 0, ..., singletons.size() - 1, + * probability[i] is the probability that a random variable following this distribution takes + * the value singletons[i]. + */ + private final double[] probabilities; + + /** Cumulative probabilities, cached to speed up sampling. */ + private final double[] cumulativeProbabilities; + + /** + * Create an enumerated distribution using the given probability mass function enumeration. + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link Well19937c} as + * random generator to be used for sampling only (see {@link #sample()} and {@link + * #sample(int)}). In case no sampling is needed for the created distribution, it is advised to + * pass {@code null} as random generator via the appropriate constructors to avoid the + * additional initialisation overhead. + * + * @param pmf probability mass function enumerated as a list of <T, probability> pairs. + * @throws NotPositiveException if any of the probabilities are negative. + * @throws NotFiniteNumberException if any of the probabilities are infinite. + * @throws NotANumberException if any of the probabilities are NaN. + * @throws MathArithmeticException all of the probabilities are 0. + */ + public EnumeratedDistribution(final List<Pair<T, Double>> pmf) + throws NotPositiveException, + MathArithmeticException, + NotFiniteNumberException, + NotANumberException { + this(new Well19937c(), pmf); + } + + /** + * Create an enumerated distribution using the given random number generator and probability + * mass function enumeration. + * + * @param rng random number generator. + * @param pmf probability mass function enumerated as a list of <T, probability> pairs. + * @throws NotPositiveException if any of the probabilities are negative. + * @throws NotFiniteNumberException if any of the probabilities are infinite. + * @throws NotANumberException if any of the probabilities are NaN. + * @throws MathArithmeticException all of the probabilities are 0. + */ + public EnumeratedDistribution(final RandomGenerator rng, final List<Pair<T, Double>> pmf) + throws NotPositiveException, + MathArithmeticException, + NotFiniteNumberException, + NotANumberException { + random = rng; + + singletons = new ArrayList<T>(pmf.size()); + final double[] probs = new double[pmf.size()]; + + for (int i = 0; i < pmf.size(); i++) { + final Pair<T, Double> sample = pmf.get(i); + singletons.add(sample.getKey()); + final double p = sample.getValue(); + if (p < 0) { + throw new NotPositiveException(sample.getValue()); + } + if (Double.isInfinite(p)) { + throw new NotFiniteNumberException(p); + } + if (Double.isNaN(p)) { + throw new NotANumberException(); + } + probs[i] = p; + } + + probabilities = MathArrays.normalizeArray(probs, 1.0); + + cumulativeProbabilities = new double[probabilities.length]; + double sum = 0; + for (int i = 0; i < probabilities.length; i++) { + sum += probabilities[i]; + cumulativeProbabilities[i] = sum; + } + } + + /** + * Reseed the random generator used to generate samples. + * + * @param seed the new seed + */ + public void reseedRandomGenerator(long seed) { + random.setSeed(seed); + } + + /** + * For a random variable {@code X} whose values are distributed according to this distribution, + * this method returns {@code P(X = x)}. In other words, this method represents the probability + * mass function (PMF) for the distribution. + * + * <p>Note that if {@code x1} and {@code x2} satisfy {@code x1.equals(x2)}, or both are null, + * then {@code probability(x1) = probability(x2)}. + * + * @param x the point at which the PMF is evaluated + * @return the value of the probability mass function at {@code x} + */ + double probability(final T x) { + double probability = 0; + + for (int i = 0; i < probabilities.length; i++) { + if ((x == null && singletons.get(i) == null) + || (x != null && x.equals(singletons.get(i)))) { + probability += probabilities[i]; + } + } + + return probability; + } + + /** + * Return the probability mass function as a list of <value, probability> pairs. + * + * <p>Note that if duplicate and / or null values were provided to the constructor when creating + * this EnumeratedDistribution, the returned list will contain these values. If duplicates + * values exist, what is returned will not represent a pmf (i.e., it is up to the caller to + * consolidate duplicate mass points). + * + * @return the probability mass function. + */ + public List<Pair<T, Double>> getPmf() { + final List<Pair<T, Double>> samples = new ArrayList<Pair<T, Double>>(probabilities.length); + + for (int i = 0; i < probabilities.length; i++) { + samples.add(new Pair<T, Double>(singletons.get(i), probabilities[i])); + } + + return samples; + } + + /** + * Generate a random value sampled from this distribution. + * + * @return a random value. + */ + public T sample() { + final double randomValue = random.nextDouble(); + + int index = Arrays.binarySearch(cumulativeProbabilities, randomValue); + if (index < 0) { + index = -index - 1; + } + + if (index >= 0 + && index < probabilities.length + && randomValue < cumulativeProbabilities[index]) { + return singletons.get(index); + } + + /* This should never happen, but it ensures we will return a correct + * object in case there is some floating point inequality problem + * wrt the cumulative probabilities. */ + return singletons.get(singletons.size() - 1); + } + + /** + * Generate a random sample from the distribution. + * + * @param sampleSize the number of random values to generate. + * @return an array representing the random sample. + * @throws NotStrictlyPositiveException if {@code sampleSize} is not positive. + */ + public Object[] sample(int sampleSize) throws NotStrictlyPositiveException { + if (sampleSize <= 0) { + throw new NotStrictlyPositiveException(LocalizedFormats.NUMBER_OF_SAMPLES, sampleSize); + } + + final Object[] out = new Object[sampleSize]; + + for (int i = 0; i < sampleSize; i++) { + out[i] = sample(); + } + + return out; + } + + /** + * Generate a random sample from the distribution. + * + * <p>If the requested samples fit in the specified array, it is returned therein. Otherwise, a + * new array is allocated with the runtime type of the specified array and the size of this + * collection. + * + * @param sampleSize the number of random values to generate. + * @param array the array to populate. + * @return an array representing the random sample. + * @throws NotStrictlyPositiveException if {@code sampleSize} is not positive. + * @throws NullArgumentException if {@code array} is null + */ + public T[] sample(int sampleSize, final T[] array) throws NotStrictlyPositiveException { + if (sampleSize <= 0) { + throw new NotStrictlyPositiveException(LocalizedFormats.NUMBER_OF_SAMPLES, sampleSize); + } + + if (array == null) { + throw new NullArgumentException(LocalizedFormats.INPUT_ARRAY); + } + + T[] out; + if (array.length < sampleSize) { + @SuppressWarnings("unchecked") // safe as both are of type T + final T[] unchecked = + (T[]) Array.newInstance(array.getClass().getComponentType(), sampleSize); + out = unchecked; + } else { + out = array; + } + + for (int i = 0; i < sampleSize; i++) { + out[i] = sample(); + } + + return out; + } +} diff --git a/src/main/java/org/apache/commons/math3/distribution/EnumeratedIntegerDistribution.java b/src/main/java/org/apache/commons/math3/distribution/EnumeratedIntegerDistribution.java new file mode 100644 index 0000000..37daf57 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/distribution/EnumeratedIntegerDistribution.java @@ -0,0 +1,274 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.distribution; + +import org.apache.commons.math3.exception.DimensionMismatchException; +import org.apache.commons.math3.exception.MathArithmeticException; +import org.apache.commons.math3.exception.NotANumberException; +import org.apache.commons.math3.exception.NotFiniteNumberException; +import org.apache.commons.math3.exception.NotPositiveException; +import org.apache.commons.math3.random.RandomGenerator; +import org.apache.commons.math3.random.Well19937c; +import org.apache.commons.math3.util.Pair; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +/** + * Implementation of an integer-valued {@link EnumeratedDistribution}. + * + * <p>Values with zero-probability are allowed but they do not extend the support.<br> + * Duplicate values are allowed. Probabilities of duplicate values are combined when computing + * cumulative probabilities and statistics. + * + * @since 3.2 + */ +public class EnumeratedIntegerDistribution extends AbstractIntegerDistribution { + + /** Serializable UID. */ + private static final long serialVersionUID = 20130308L; + + /** + * {@link EnumeratedDistribution} instance (using the {@link Integer} wrapper) used to generate + * the pmf. + */ + protected final EnumeratedDistribution<Integer> innerDistribution; + + /** + * Create a discrete distribution using the given probability mass function definition. + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link Well19937c} as + * random generator to be used for sampling only (see {@link #sample()} and {@link + * #sample(int)}). In case no sampling is needed for the created distribution, it is advised to + * pass {@code null} as random generator via the appropriate constructors to avoid the + * additional initialisation overhead. + * + * @param singletons array of random variable values. + * @param probabilities array of probabilities. + * @throws DimensionMismatchException if {@code singletons.length != probabilities.length} + * @throws NotPositiveException if any of the probabilities are negative. + * @throws NotFiniteNumberException if any of the probabilities are infinite. + * @throws NotANumberException if any of the probabilities are NaN. + * @throws MathArithmeticException all of the probabilities are 0. + */ + public EnumeratedIntegerDistribution(final int[] singletons, final double[] probabilities) + throws DimensionMismatchException, + NotPositiveException, + MathArithmeticException, + NotFiniteNumberException, + NotANumberException { + this(new Well19937c(), singletons, probabilities); + } + + /** + * Create a discrete distribution using the given random number generator and probability mass + * function definition. + * + * @param rng random number generator. + * @param singletons array of random variable values. + * @param probabilities array of probabilities. + * @throws DimensionMismatchException if {@code singletons.length != probabilities.length} + * @throws NotPositiveException if any of the probabilities are negative. + * @throws NotFiniteNumberException if any of the probabilities are infinite. + * @throws NotANumberException if any of the probabilities are NaN. + * @throws MathArithmeticException all of the probabilities are 0. + */ + public EnumeratedIntegerDistribution( + final RandomGenerator rng, final int[] singletons, final double[] probabilities) + throws DimensionMismatchException, + NotPositiveException, + MathArithmeticException, + NotFiniteNumberException, + NotANumberException { + super(rng); + innerDistribution = + new EnumeratedDistribution<Integer>( + rng, createDistribution(singletons, probabilities)); + } + + /** + * Create a discrete integer-valued distribution from the input data. Values are assigned mass + * based on their frequency. + * + * @param rng random number generator used for sampling + * @param data input dataset + * @since 3.6 + */ + public EnumeratedIntegerDistribution(final RandomGenerator rng, final int[] data) { + super(rng); + final Map<Integer, Integer> dataMap = new HashMap<Integer, Integer>(); + for (int value : data) { + Integer count = dataMap.get(value); + if (count == null) { + count = 0; + } + dataMap.put(value, ++count); + } + final int massPoints = dataMap.size(); + final double denom = data.length; + final int[] values = new int[massPoints]; + final double[] probabilities = new double[massPoints]; + int index = 0; + for (Entry<Integer, Integer> entry : dataMap.entrySet()) { + values[index] = entry.getKey(); + probabilities[index] = entry.getValue().intValue() / denom; + index++; + } + innerDistribution = + new EnumeratedDistribution<Integer>(rng, createDistribution(values, probabilities)); + } + + /** + * Create a discrete integer-valued distribution from the input data. Values are assigned mass + * based on their frequency. For example, [0,1,1,2] as input creates a distribution with values + * 0, 1 and 2 having probability masses 0.25, 0.5 and 0.25 respectively, + * + * @param data input dataset + * @since 3.6 + */ + public EnumeratedIntegerDistribution(final int[] data) { + this(new Well19937c(), data); + } + + /** + * Create the list of Pairs representing the distribution from singletons and probabilities. + * + * @param singletons values + * @param probabilities probabilities + * @return list of value/probability pairs + */ + private static List<Pair<Integer, Double>> createDistribution( + int[] singletons, double[] probabilities) { + if (singletons.length != probabilities.length) { + throw new DimensionMismatchException(probabilities.length, singletons.length); + } + + final List<Pair<Integer, Double>> samples = + new ArrayList<Pair<Integer, Double>>(singletons.length); + + for (int i = 0; i < singletons.length; i++) { + samples.add(new Pair<Integer, Double>(singletons[i], probabilities[i])); + } + return samples; + } + + /** {@inheritDoc} */ + public double probability(final int x) { + return innerDistribution.probability(x); + } + + /** {@inheritDoc} */ + public double cumulativeProbability(final int x) { + double probability = 0; + + for (final Pair<Integer, Double> sample : innerDistribution.getPmf()) { + if (sample.getKey() <= x) { + probability += sample.getValue(); + } + } + + return probability; + } + + /** + * {@inheritDoc} + * + * @return {@code sum(singletons[i] * probabilities[i])} + */ + public double getNumericalMean() { + double mean = 0; + + for (final Pair<Integer, Double> sample : innerDistribution.getPmf()) { + mean += sample.getValue() * sample.getKey(); + } + + return mean; + } + + /** + * {@inheritDoc} + * + * @return {@code sum((singletons[i] - mean) ^ 2 * probabilities[i])} + */ + public double getNumericalVariance() { + double mean = 0; + double meanOfSquares = 0; + + for (final Pair<Integer, Double> sample : innerDistribution.getPmf()) { + mean += sample.getValue() * sample.getKey(); + meanOfSquares += sample.getValue() * sample.getKey() * sample.getKey(); + } + + return meanOfSquares - mean * mean; + } + + /** + * {@inheritDoc} + * + * <p>Returns the lowest value with non-zero probability. + * + * @return the lowest value with non-zero probability. + */ + public int getSupportLowerBound() { + int min = Integer.MAX_VALUE; + for (final Pair<Integer, Double> sample : innerDistribution.getPmf()) { + if (sample.getKey() < min && sample.getValue() > 0) { + min = sample.getKey(); + } + } + + return min; + } + + /** + * {@inheritDoc} + * + * <p>Returns the highest value with non-zero probability. + * + * @return the highest value with non-zero probability. + */ + public int getSupportUpperBound() { + int max = Integer.MIN_VALUE; + for (final Pair<Integer, Double> sample : innerDistribution.getPmf()) { + if (sample.getKey() > max && sample.getValue() > 0) { + max = sample.getKey(); + } + } + + return max; + } + + /** + * {@inheritDoc} + * + * <p>The support of this distribution is connected. + * + * @return {@code true} + */ + public boolean isSupportConnected() { + return true; + } + + /** {@inheritDoc} */ + @Override + public int sample() { + return innerDistribution.sample(); + } +} diff --git a/src/main/java/org/apache/commons/math3/distribution/EnumeratedRealDistribution.java b/src/main/java/org/apache/commons/math3/distribution/EnumeratedRealDistribution.java new file mode 100644 index 0000000..2dd35ec --- /dev/null +++ b/src/main/java/org/apache/commons/math3/distribution/EnumeratedRealDistribution.java @@ -0,0 +1,336 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.distribution; + +import org.apache.commons.math3.exception.DimensionMismatchException; +import org.apache.commons.math3.exception.MathArithmeticException; +import org.apache.commons.math3.exception.NotANumberException; +import org.apache.commons.math3.exception.NotFiniteNumberException; +import org.apache.commons.math3.exception.NotPositiveException; +import org.apache.commons.math3.exception.OutOfRangeException; +import org.apache.commons.math3.random.RandomGenerator; +import org.apache.commons.math3.random.Well19937c; +import org.apache.commons.math3.util.Pair; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +/** + * Implementation of a real-valued {@link EnumeratedDistribution}. + * + * <p>Values with zero-probability are allowed but they do not extend the support.<br> + * Duplicate values are allowed. Probabilities of duplicate values are combined when computing + * cumulative probabilities and statistics. + * + * @since 3.2 + */ +public class EnumeratedRealDistribution extends AbstractRealDistribution { + + /** Serializable UID. */ + private static final long serialVersionUID = 20130308L; + + /** + * {@link EnumeratedDistribution} (using the {@link Double} wrapper) used to generate the pmf. + */ + protected final EnumeratedDistribution<Double> innerDistribution; + + /** + * Create a discrete real-valued distribution using the given probability mass function + * enumeration. + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link Well19937c} as + * random generator to be used for sampling only (see {@link #sample()} and {@link + * #sample(int)}). In case no sampling is needed for the created distribution, it is advised to + * pass {@code null} as random generator via the appropriate constructors to avoid the + * additional initialisation overhead. + * + * @param singletons array of random variable values. + * @param probabilities array of probabilities. + * @throws DimensionMismatchException if {@code singletons.length != probabilities.length} + * @throws NotPositiveException if any of the probabilities are negative. + * @throws NotFiniteNumberException if any of the probabilities are infinite. + * @throws NotANumberException if any of the probabilities are NaN. + * @throws MathArithmeticException all of the probabilities are 0. + */ + public EnumeratedRealDistribution(final double[] singletons, final double[] probabilities) + throws DimensionMismatchException, + NotPositiveException, + MathArithmeticException, + NotFiniteNumberException, + NotANumberException { + this(new Well19937c(), singletons, probabilities); + } + + /** + * Create a discrete real-valued distribution using the given random number generator and + * probability mass function enumeration. + * + * @param rng random number generator. + * @param singletons array of random variable values. + * @param probabilities array of probabilities. + * @throws DimensionMismatchException if {@code singletons.length != probabilities.length} + * @throws NotPositiveException if any of the probabilities are negative. + * @throws NotFiniteNumberException if any of the probabilities are infinite. + * @throws NotANumberException if any of the probabilities are NaN. + * @throws MathArithmeticException all of the probabilities are 0. + */ + public EnumeratedRealDistribution( + final RandomGenerator rng, final double[] singletons, final double[] probabilities) + throws DimensionMismatchException, + NotPositiveException, + MathArithmeticException, + NotFiniteNumberException, + NotANumberException { + super(rng); + + innerDistribution = + new EnumeratedDistribution<Double>( + rng, createDistribution(singletons, probabilities)); + } + + /** + * Create a discrete real-valued distribution from the input data. Values are assigned mass + * based on their frequency. + * + * @param rng random number generator used for sampling + * @param data input dataset + * @since 3.6 + */ + public EnumeratedRealDistribution(final RandomGenerator rng, final double[] data) { + super(rng); + final Map<Double, Integer> dataMap = new HashMap<Double, Integer>(); + for (double value : data) { + Integer count = dataMap.get(value); + if (count == null) { + count = 0; + } + dataMap.put(value, ++count); + } + final int massPoints = dataMap.size(); + final double denom = data.length; + final double[] values = new double[massPoints]; + final double[] probabilities = new double[massPoints]; + int index = 0; + for (Entry<Double, Integer> entry : dataMap.entrySet()) { + values[index] = entry.getKey(); + probabilities[index] = entry.getValue().intValue() / denom; + index++; + } + innerDistribution = + new EnumeratedDistribution<Double>(rng, createDistribution(values, probabilities)); + } + + /** + * Create a discrete real-valued distribution from the input data. Values are assigned mass + * based on their frequency. For example, [0,1,1,2] as input creates a distribution with values + * 0, 1 and 2 having probability masses 0.25, 0.5 and 0.25 respectively, + * + * @param data input dataset + * @since 3.6 + */ + public EnumeratedRealDistribution(final double[] data) { + this(new Well19937c(), data); + } + + /** + * Create the list of Pairs representing the distribution from singletons and probabilities. + * + * @param singletons values + * @param probabilities probabilities + * @return list of value/probability pairs + */ + private static List<Pair<Double, Double>> createDistribution( + double[] singletons, double[] probabilities) { + if (singletons.length != probabilities.length) { + throw new DimensionMismatchException(probabilities.length, singletons.length); + } + + final List<Pair<Double, Double>> samples = + new ArrayList<Pair<Double, Double>>(singletons.length); + + for (int i = 0; i < singletons.length; i++) { + samples.add(new Pair<Double, Double>(singletons[i], probabilities[i])); + } + return samples; + } + + /** {@inheritDoc} */ + @Override + public double probability(final double x) { + return innerDistribution.probability(x); + } + + /** + * For a random variable {@code X} whose values are distributed according to this distribution, + * this method returns {@code P(X = x)}. In other words, this method represents the probability + * mass function (PMF) for the distribution. + * + * @param x the point at which the PMF is evaluated + * @return the value of the probability mass function at point {@code x} + */ + public double density(final double x) { + return probability(x); + } + + /** {@inheritDoc} */ + public double cumulativeProbability(final double x) { + double probability = 0; + + for (final Pair<Double, Double> sample : innerDistribution.getPmf()) { + if (sample.getKey() <= x) { + probability += sample.getValue(); + } + } + + return probability; + } + + /** {@inheritDoc} */ + @Override + public double inverseCumulativeProbability(final double p) throws OutOfRangeException { + if (p < 0.0 || p > 1.0) { + throw new OutOfRangeException(p, 0, 1); + } + + double probability = 0; + double x = getSupportLowerBound(); + for (final Pair<Double, Double> sample : innerDistribution.getPmf()) { + if (sample.getValue() == 0.0) { + continue; + } + + probability += sample.getValue(); + x = sample.getKey(); + + if (probability >= p) { + break; + } + } + + return x; + } + + /** + * {@inheritDoc} + * + * @return {@code sum(singletons[i] * probabilities[i])} + */ + public double getNumericalMean() { + double mean = 0; + + for (final Pair<Double, Double> sample : innerDistribution.getPmf()) { + mean += sample.getValue() * sample.getKey(); + } + + return mean; + } + + /** + * {@inheritDoc} + * + * @return {@code sum((singletons[i] - mean) ^ 2 * probabilities[i])} + */ + public double getNumericalVariance() { + double mean = 0; + double meanOfSquares = 0; + + for (final Pair<Double, Double> sample : innerDistribution.getPmf()) { + mean += sample.getValue() * sample.getKey(); + meanOfSquares += sample.getValue() * sample.getKey() * sample.getKey(); + } + + return meanOfSquares - mean * mean; + } + + /** + * {@inheritDoc} + * + * <p>Returns the lowest value with non-zero probability. + * + * @return the lowest value with non-zero probability. + */ + public double getSupportLowerBound() { + double min = Double.POSITIVE_INFINITY; + for (final Pair<Double, Double> sample : innerDistribution.getPmf()) { + if (sample.getKey() < min && sample.getValue() > 0) { + min = sample.getKey(); + } + } + + return min; + } + + /** + * {@inheritDoc} + * + * <p>Returns the highest value with non-zero probability. + * + * @return the highest value with non-zero probability. + */ + public double getSupportUpperBound() { + double max = Double.NEGATIVE_INFINITY; + for (final Pair<Double, Double> sample : innerDistribution.getPmf()) { + if (sample.getKey() > max && sample.getValue() > 0) { + max = sample.getKey(); + } + } + + return max; + } + + /** + * {@inheritDoc} + * + * <p>The support of this distribution includes the lower bound. + * + * @return {@code true} + */ + public boolean isSupportLowerBoundInclusive() { + return true; + } + + /** + * {@inheritDoc} + * + * <p>The support of this distribution includes the upper bound. + * + * @return {@code true} + */ + public boolean isSupportUpperBoundInclusive() { + return true; + } + + /** + * {@inheritDoc} + * + * <p>The support of this distribution is connected. + * + * @return {@code true} + */ + public boolean isSupportConnected() { + return true; + } + + /** {@inheritDoc} */ + @Override + public double sample() { + return innerDistribution.sample(); + } +} diff --git a/src/main/java/org/apache/commons/math3/distribution/ExponentialDistribution.java b/src/main/java/org/apache/commons/math3/distribution/ExponentialDistribution.java new file mode 100644 index 0000000..6ca5f1c --- /dev/null +++ b/src/main/java/org/apache/commons/math3/distribution/ExponentialDistribution.java @@ -0,0 +1,342 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.distribution; + +import org.apache.commons.math3.exception.NotStrictlyPositiveException; +import org.apache.commons.math3.exception.OutOfRangeException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.random.RandomGenerator; +import org.apache.commons.math3.random.Well19937c; +import org.apache.commons.math3.util.CombinatoricsUtils; +import org.apache.commons.math3.util.FastMath; +import org.apache.commons.math3.util.ResizableDoubleArray; + +/** + * Implementation of the exponential distribution. + * + * @see <a href="http://en.wikipedia.org/wiki/Exponential_distribution">Exponential distribution + * (Wikipedia)</a> + * @see <a href="http://mathworld.wolfram.com/ExponentialDistribution.html">Exponential distribution + * (MathWorld)</a> + */ +public class ExponentialDistribution extends AbstractRealDistribution { + /** + * Default inverse cumulative probability accuracy. + * + * @since 2.1 + */ + public static final double DEFAULT_INVERSE_ABSOLUTE_ACCURACY = 1e-9; + + /** Serializable version identifier */ + private static final long serialVersionUID = 2401296428283614780L; + + /** + * Used when generating Exponential samples. Table containing the constants q_i = sum_{j=1}^i + * (ln 2)^j/j! = ln 2 + (ln 2)^2/2 + ... + (ln 2)^i/i! until the largest representable fraction + * below 1 is exceeded. + * + * <p>Note that 1 = 2 - 1 = exp(ln 2) - 1 = sum_{n=1}^infty (ln 2)^n / n! thus q_i -> 1 as i -> + * +inf, so the higher i, the closer to one we get (the series is not alternating). + * + * <p>By trying, n = 16 in Java is enough to reach 1.0. + */ + private static final double[] EXPONENTIAL_SA_QI; + + /** The mean of this distribution. */ + private final double mean; + + /** The logarithm of the mean, stored to reduce computing time. * */ + private final double logMean; + + /** Inverse cumulative probability accuracy. */ + private final double solverAbsoluteAccuracy; + + /** Initialize tables. */ + static { + /** Filling EXPONENTIAL_SA_QI table. Note that we don't want qi = 0 in the table. */ + final double LN2 = FastMath.log(2); + double qi = 0; + int i = 1; + + /** + * ArithmeticUtils provides factorials up to 20, so let's use that limit together with + * Precision.EPSILON to generate the following code (a priori, we know that there will be 16 + * elements, but it is better to not hardcode it). + */ + final ResizableDoubleArray ra = new ResizableDoubleArray(20); + + while (qi < 1) { + qi += FastMath.pow(LN2, i) / CombinatoricsUtils.factorial(i); + ra.addElement(qi); + ++i; + } + + EXPONENTIAL_SA_QI = ra.getElements(); + } + + /** + * Create an exponential distribution with the given mean. + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link Well19937c} as + * random generator to be used for sampling only (see {@link #sample()} and {@link + * #sample(int)}). In case no sampling is needed for the created distribution, it is advised to + * pass {@code null} as random generator via the appropriate constructors to avoid the + * additional initialisation overhead. + * + * @param mean mean of this distribution. + */ + public ExponentialDistribution(double mean) { + this(mean, DEFAULT_INVERSE_ABSOLUTE_ACCURACY); + } + + /** + * Create an exponential distribution with the given mean. + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link Well19937c} as + * random generator to be used for sampling only (see {@link #sample()} and {@link + * #sample(int)}). In case no sampling is needed for the created distribution, it is advised to + * pass {@code null} as random generator via the appropriate constructors to avoid the + * additional initialisation overhead. + * + * @param mean Mean of this distribution. + * @param inverseCumAccuracy Maximum absolute error in inverse cumulative probability estimates + * (defaults to {@link #DEFAULT_INVERSE_ABSOLUTE_ACCURACY}). + * @throws NotStrictlyPositiveException if {@code mean <= 0}. + * @since 2.1 + */ + public ExponentialDistribution(double mean, double inverseCumAccuracy) { + this(new Well19937c(), mean, inverseCumAccuracy); + } + + /** + * Creates an exponential distribution. + * + * @param rng Random number generator. + * @param mean Mean of this distribution. + * @throws NotStrictlyPositiveException if {@code mean <= 0}. + * @since 3.3 + */ + public ExponentialDistribution(RandomGenerator rng, double mean) + throws NotStrictlyPositiveException { + this(rng, mean, DEFAULT_INVERSE_ABSOLUTE_ACCURACY); + } + + /** + * Creates an exponential distribution. + * + * @param rng Random number generator. + * @param mean Mean of this distribution. + * @param inverseCumAccuracy Maximum absolute error in inverse cumulative probability estimates + * (defaults to {@link #DEFAULT_INVERSE_ABSOLUTE_ACCURACY}). + * @throws NotStrictlyPositiveException if {@code mean <= 0}. + * @since 3.1 + */ + public ExponentialDistribution(RandomGenerator rng, double mean, double inverseCumAccuracy) + throws NotStrictlyPositiveException { + super(rng); + + if (mean <= 0) { + throw new NotStrictlyPositiveException(LocalizedFormats.MEAN, mean); + } + this.mean = mean; + logMean = FastMath.log(mean); + solverAbsoluteAccuracy = inverseCumAccuracy; + } + + /** + * Access the mean. + * + * @return the mean. + */ + public double getMean() { + return mean; + } + + /** {@inheritDoc} */ + public double density(double x) { + final double logDensity = logDensity(x); + return logDensity == Double.NEGATIVE_INFINITY ? 0 : FastMath.exp(logDensity); + } + + /** {@inheritDoc} * */ + @Override + public double logDensity(double x) { + if (x < 0) { + return Double.NEGATIVE_INFINITY; + } + return -x / mean - logMean; + } + + /** + * {@inheritDoc} + * + * <p>The implementation of this method is based on: + * + * <ul> + * <li><a href="http://mathworld.wolfram.com/ExponentialDistribution.html">Exponential + * Distribution</a>, equation (1). + * </ul> + */ + public double cumulativeProbability(double x) { + double ret; + if (x <= 0.0) { + ret = 0.0; + } else { + ret = 1.0 - FastMath.exp(-x / mean); + } + return ret; + } + + /** + * {@inheritDoc} + * + * <p>Returns {@code 0} when {@code p= = 0} and {@code Double.POSITIVE_INFINITY} when {@code p + * == 1}. + */ + @Override + public double inverseCumulativeProbability(double p) throws OutOfRangeException { + double ret; + + if (p < 0.0 || p > 1.0) { + throw new OutOfRangeException(p, 0.0, 1.0); + } else if (p == 1.0) { + ret = Double.POSITIVE_INFINITY; + } else { + ret = -mean * FastMath.log(1.0 - p); + } + + return ret; + } + + /** + * {@inheritDoc} + * + * <p><strong>Algorithm Description</strong>: this implementation uses the <a + * href="http://www.jesus.ox.ac.uk/~clifford/a5/chap1/node5.html">Inversion Method</a> to + * generate exponentially distributed random values from uniform deviates. + * + * @return a random value. + * @since 2.2 + */ + @Override + public double sample() { + // Step 1: + double a = 0; + double u = random.nextDouble(); + + // Step 2 and 3: + while (u < 0.5) { + a += EXPONENTIAL_SA_QI[0]; + u *= 2; + } + + // Step 4 (now u >= 0.5): + u += u - 1; + + // Step 5: + if (u <= EXPONENTIAL_SA_QI[0]) { + return mean * (a + u); + } + + // Step 6: + int i = 0; // Should be 1, be we iterate before it in while using 0 + double u2 = random.nextDouble(); + double umin = u2; + + // Step 7 and 8: + do { + ++i; + u2 = random.nextDouble(); + + if (u2 < umin) { + umin = u2; + } + + // Step 8: + } while (u > EXPONENTIAL_SA_QI[i]); // Ensured to exit since EXPONENTIAL_SA_QI[MAX] = 1 + + return mean * (a + umin * EXPONENTIAL_SA_QI[0]); + } + + /** {@inheritDoc} */ + @Override + protected double getSolverAbsoluteAccuracy() { + return solverAbsoluteAccuracy; + } + + /** + * {@inheritDoc} + * + * <p>For mean parameter {@code k}, the mean is {@code k}. + */ + public double getNumericalMean() { + return getMean(); + } + + /** + * {@inheritDoc} + * + * <p>For mean parameter {@code k}, the variance is {@code k^2}. + */ + public double getNumericalVariance() { + final double m = getMean(); + return m * m; + } + + /** + * {@inheritDoc} + * + * <p>The lower bound of the support is always 0 no matter the mean parameter. + * + * @return lower bound of the support (always 0) + */ + public double getSupportLowerBound() { + return 0; + } + + /** + * {@inheritDoc} + * + * <p>The upper bound of the support is always positive infinity no matter the mean parameter. + * + * @return upper bound of the support (always Double.POSITIVE_INFINITY) + */ + public double getSupportUpperBound() { + return Double.POSITIVE_INFINITY; + } + + /** {@inheritDoc} */ + public boolean isSupportLowerBoundInclusive() { + return true; + } + + /** {@inheritDoc} */ + public boolean isSupportUpperBoundInclusive() { + return false; + } + + /** + * {@inheritDoc} + * + * <p>The support of this distribution is connected. + * + * @return {@code true} + */ + public boolean isSupportConnected() { + return true; + } +} diff --git a/src/main/java/org/apache/commons/math3/distribution/FDistribution.java b/src/main/java/org/apache/commons/math3/distribution/FDistribution.java new file mode 100644 index 0000000..3269f8d --- /dev/null +++ b/src/main/java/org/apache/commons/math3/distribution/FDistribution.java @@ -0,0 +1,341 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.math3.distribution; + +import org.apache.commons.math3.exception.NotStrictlyPositiveException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.random.RandomGenerator; +import org.apache.commons.math3.random.Well19937c; +import org.apache.commons.math3.special.Beta; +import org.apache.commons.math3.util.FastMath; + +/** + * Implementation of the F-distribution. + * + * @see <a href="http://en.wikipedia.org/wiki/F-distribution">F-distribution (Wikipedia)</a> + * @see <a href="http://mathworld.wolfram.com/F-Distribution.html">F-distribution (MathWorld)</a> + */ +public class FDistribution extends AbstractRealDistribution { + /** + * Default inverse cumulative probability accuracy. + * + * @since 2.1 + */ + public static final double DEFAULT_INVERSE_ABSOLUTE_ACCURACY = 1e-9; + + /** Serializable version identifier. */ + private static final long serialVersionUID = -8516354193418641566L; + + /** The numerator degrees of freedom. */ + private final double numeratorDegreesOfFreedom; + + /** The numerator degrees of freedom. */ + private final double denominatorDegreesOfFreedom; + + /** Inverse cumulative probability accuracy. */ + private final double solverAbsoluteAccuracy; + + /** Cached numerical variance */ + private double numericalVariance = Double.NaN; + + /** Whether or not the numerical variance has been calculated */ + private boolean numericalVarianceIsCalculated = false; + + /** + * Creates an F distribution using the given degrees of freedom. + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link Well19937c} as + * random generator to be used for sampling only (see {@link #sample()} and {@link + * #sample(int)}). In case no sampling is needed for the created distribution, it is advised to + * pass {@code null} as random generator via the appropriate constructors to avoid the + * additional initialisation overhead. + * + * @param numeratorDegreesOfFreedom Numerator degrees of freedom. + * @param denominatorDegreesOfFreedom Denominator degrees of freedom. + * @throws NotStrictlyPositiveException if {@code numeratorDegreesOfFreedom <= 0} or {@code + * denominatorDegreesOfFreedom <= 0}. + */ + public FDistribution(double numeratorDegreesOfFreedom, double denominatorDegreesOfFreedom) + throws NotStrictlyPositiveException { + this( + numeratorDegreesOfFreedom, + denominatorDegreesOfFreedom, + DEFAULT_INVERSE_ABSOLUTE_ACCURACY); + } + + /** + * Creates an F distribution using the given degrees of freedom and inverse cumulative + * probability accuracy. + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link Well19937c} as + * random generator to be used for sampling only (see {@link #sample()} and {@link + * #sample(int)}). In case no sampling is needed for the created distribution, it is advised to + * pass {@code null} as random generator via the appropriate constructors to avoid the + * additional initialisation overhead. + * + * @param numeratorDegreesOfFreedom Numerator degrees of freedom. + * @param denominatorDegreesOfFreedom Denominator degrees of freedom. + * @param inverseCumAccuracy the maximum absolute error in inverse cumulative probability + * estimates. + * @throws NotStrictlyPositiveException if {@code numeratorDegreesOfFreedom <= 0} or {@code + * denominatorDegreesOfFreedom <= 0}. + * @since 2.1 + */ + public FDistribution( + double numeratorDegreesOfFreedom, + double denominatorDegreesOfFreedom, + double inverseCumAccuracy) + throws NotStrictlyPositiveException { + this( + new Well19937c(), + numeratorDegreesOfFreedom, + denominatorDegreesOfFreedom, + inverseCumAccuracy); + } + + /** + * Creates an F distribution. + * + * @param rng Random number generator. + * @param numeratorDegreesOfFreedom Numerator degrees of freedom. + * @param denominatorDegreesOfFreedom Denominator degrees of freedom. + * @throws NotStrictlyPositiveException if {@code numeratorDegreesOfFreedom <= 0} or {@code + * denominatorDegreesOfFreedom <= 0}. + * @since 3.3 + */ + public FDistribution( + RandomGenerator rng, + double numeratorDegreesOfFreedom, + double denominatorDegreesOfFreedom) + throws NotStrictlyPositiveException { + this( + rng, + numeratorDegreesOfFreedom, + denominatorDegreesOfFreedom, + DEFAULT_INVERSE_ABSOLUTE_ACCURACY); + } + + /** + * Creates an F distribution. + * + * @param rng Random number generator. + * @param numeratorDegreesOfFreedom Numerator degrees of freedom. + * @param denominatorDegreesOfFreedom Denominator degrees of freedom. + * @param inverseCumAccuracy the maximum absolute error in inverse cumulative probability + * estimates. + * @throws NotStrictlyPositiveException if {@code numeratorDegreesOfFreedom <= 0} or {@code + * denominatorDegreesOfFreedom <= 0}. + * @since 3.1 + */ + public FDistribution( + RandomGenerator rng, + double numeratorDegreesOfFreedom, + double denominatorDegreesOfFreedom, + double inverseCumAccuracy) + throws NotStrictlyPositiveException { + super(rng); + + if (numeratorDegreesOfFreedom <= 0) { + throw new NotStrictlyPositiveException( + LocalizedFormats.DEGREES_OF_FREEDOM, numeratorDegreesOfFreedom); + } + if (denominatorDegreesOfFreedom <= 0) { + throw new NotStrictlyPositiveException( + LocalizedFormats.DEGREES_OF_FREEDOM, denominatorDegreesOfFreedom); + } + this.numeratorDegreesOfFreedom = numeratorDegreesOfFreedom; + this.denominatorDegreesOfFreedom = denominatorDegreesOfFreedom; + solverAbsoluteAccuracy = inverseCumAccuracy; + } + + /** + * {@inheritDoc} + * + * @since 2.1 + */ + public double density(double x) { + return FastMath.exp(logDensity(x)); + } + + /** {@inheritDoc} * */ + @Override + public double logDensity(double x) { + final double nhalf = numeratorDegreesOfFreedom / 2; + final double mhalf = denominatorDegreesOfFreedom / 2; + final double logx = FastMath.log(x); + final double logn = FastMath.log(numeratorDegreesOfFreedom); + final double logm = FastMath.log(denominatorDegreesOfFreedom); + final double lognxm = + FastMath.log(numeratorDegreesOfFreedom * x + denominatorDegreesOfFreedom); + return nhalf * logn + + nhalf * logx + - logx + + mhalf * logm + - nhalf * lognxm + - mhalf * lognxm + - Beta.logBeta(nhalf, mhalf); + } + + /** + * {@inheritDoc} + * + * <p>The implementation of this method is based on + * + * <ul> + * <li><a href="http://mathworld.wolfram.com/F-Distribution.html">F-Distribution</a>, equation + * (4). + * </ul> + */ + public double cumulativeProbability(double x) { + double ret; + if (x <= 0) { + ret = 0; + } else { + double n = numeratorDegreesOfFreedom; + double m = denominatorDegreesOfFreedom; + + ret = Beta.regularizedBeta((n * x) / (m + n * x), 0.5 * n, 0.5 * m); + } + return ret; + } + + /** + * Access the numerator degrees of freedom. + * + * @return the numerator degrees of freedom. + */ + public double getNumeratorDegreesOfFreedom() { + return numeratorDegreesOfFreedom; + } + + /** + * Access the denominator degrees of freedom. + * + * @return the denominator degrees of freedom. + */ + public double getDenominatorDegreesOfFreedom() { + return denominatorDegreesOfFreedom; + } + + /** {@inheritDoc} */ + @Override + protected double getSolverAbsoluteAccuracy() { + return solverAbsoluteAccuracy; + } + + /** + * {@inheritDoc} + * + * <p>For denominator degrees of freedom parameter {@code b}, the mean is + * + * <ul> + * <li>if {@code b > 2} then {@code b / (b - 2)}, + * <li>else undefined ({@code Double.NaN}). + * </ul> + */ + public double getNumericalMean() { + final double denominatorDF = getDenominatorDegreesOfFreedom(); + + if (denominatorDF > 2) { + return denominatorDF / (denominatorDF - 2); + } + + return Double.NaN; + } + + /** + * {@inheritDoc} + * + * <p>For numerator degrees of freedom parameter {@code a} and denominator degrees of freedom + * parameter {@code b}, the variance is + * + * <ul> + * <li>if {@code b > 4} then {@code [2 * b^2 * (a + b - 2)] / [a * (b - 2)^2 * (b - 4)]}, + * <li>else undefined ({@code Double.NaN}). + * </ul> + */ + public double getNumericalVariance() { + if (!numericalVarianceIsCalculated) { + numericalVariance = calculateNumericalVariance(); + numericalVarianceIsCalculated = true; + } + return numericalVariance; + } + + /** + * used by {@link #getNumericalVariance()} + * + * @return the variance of this distribution + */ + protected double calculateNumericalVariance() { + final double denominatorDF = getDenominatorDegreesOfFreedom(); + + if (denominatorDF > 4) { + final double numeratorDF = getNumeratorDegreesOfFreedom(); + final double denomDFMinusTwo = denominatorDF - 2; + + return (2 * (denominatorDF * denominatorDF) * (numeratorDF + denominatorDF - 2)) + / ((numeratorDF * (denomDFMinusTwo * denomDFMinusTwo) * (denominatorDF - 4))); + } + + return Double.NaN; + } + + /** + * {@inheritDoc} + * + * <p>The lower bound of the support is always 0 no matter the parameters. + * + * @return lower bound of the support (always 0) + */ + public double getSupportLowerBound() { + return 0; + } + + /** + * {@inheritDoc} + * + * <p>The upper bound of the support is always positive infinity no matter the parameters. + * + * @return upper bound of the support (always Double.POSITIVE_INFINITY) + */ + public double getSupportUpperBound() { + return Double.POSITIVE_INFINITY; + } + + /** {@inheritDoc} */ + public boolean isSupportLowerBoundInclusive() { + return false; + } + + /** {@inheritDoc} */ + public boolean isSupportUpperBoundInclusive() { + return false; + } + + /** + * {@inheritDoc} + * + * <p>The support of this distribution is connected. + * + * @return {@code true} + */ + public boolean isSupportConnected() { + return true; + } +} diff --git a/src/main/java/org/apache/commons/math3/distribution/GammaDistribution.java b/src/main/java/org/apache/commons/math3/distribution/GammaDistribution.java new file mode 100644 index 0000000..f062fd2 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/distribution/GammaDistribution.java @@ -0,0 +1,505 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.distribution; + +import org.apache.commons.math3.exception.NotStrictlyPositiveException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.random.RandomGenerator; +import org.apache.commons.math3.random.Well19937c; +import org.apache.commons.math3.special.Gamma; +import org.apache.commons.math3.util.FastMath; + +/** + * Implementation of the Gamma distribution. + * + * @see <a href="http://en.wikipedia.org/wiki/Gamma_distribution">Gamma distribution (Wikipedia)</a> + * @see <a href="http://mathworld.wolfram.com/GammaDistribution.html">Gamma distribution + * (MathWorld)</a> + */ +public class GammaDistribution extends AbstractRealDistribution { + /** + * Default inverse cumulative probability accuracy. + * + * @since 2.1 + */ + public static final double DEFAULT_INVERSE_ABSOLUTE_ACCURACY = 1e-9; + + /** Serializable version identifier. */ + private static final long serialVersionUID = 20120524L; + + /** The shape parameter. */ + private final double shape; + + /** The scale parameter. */ + private final double scale; + + /** + * The constant value of {@code shape + g + 0.5}, where {@code g} is the Lanczos constant {@link + * Gamma#LANCZOS_G}. + */ + private final double shiftedShape; + + /** + * The constant value of {@code shape / scale * sqrt(e / (2 * pi * (shape + g + 0.5))) / + * L(shape)}, where {@code L(shape)} is the Lanczos approximation returned by {@link + * Gamma#lanczos(double)}. This prefactor is used in {@link #density(double)}, when no overflow + * occurs with the natural calculation. + */ + private final double densityPrefactor1; + + /** + * The constant value of {@code log(shape / scale * sqrt(e / (2 * pi * (shape + g + 0.5))) / + * L(shape))}, where {@code L(shape)} is the Lanczos approximation returned by {@link + * Gamma#lanczos(double)}. This prefactor is used in {@link #logDensity(double)}, when no + * overflow occurs with the natural calculation. + */ + private final double logDensityPrefactor1; + + /** + * The constant value of {@code shape * sqrt(e / (2 * pi * (shape + g + 0.5))) / L(shape)}, + * where {@code L(shape)} is the Lanczos approximation returned by {@link + * Gamma#lanczos(double)}. This prefactor is used in {@link #density(double)}, when overflow + * occurs with the natural calculation. + */ + private final double densityPrefactor2; + + /** + * The constant value of {@code log(shape * sqrt(e / (2 * pi * (shape + g + 0.5))) / L(shape))}, + * where {@code L(shape)} is the Lanczos approximation returned by {@link + * Gamma#lanczos(double)}. This prefactor is used in {@link #logDensity(double)}, when overflow + * occurs with the natural calculation. + */ + private final double logDensityPrefactor2; + + /** + * Lower bound on {@code y = x / scale} for the selection of the computation method in {@link + * #density(double)}. For {@code y <= minY}, the natural calculation overflows. + */ + private final double minY; + + /** + * Upper bound on {@code log(y)} ({@code y = x / scale}) for the selection of the computation + * method in {@link #density(double)}. For {@code log(y) >= maxLogY}, the natural calculation + * overflows. + */ + private final double maxLogY; + + /** Inverse cumulative probability accuracy. */ + private final double solverAbsoluteAccuracy; + + /** + * Creates a new gamma distribution with specified values of the shape and scale parameters. + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link Well19937c} as + * random generator to be used for sampling only (see {@link #sample()} and {@link + * #sample(int)}). In case no sampling is needed for the created distribution, it is advised to + * pass {@code null} as random generator via the appropriate constructors to avoid the + * additional initialisation overhead. + * + * @param shape the shape parameter + * @param scale the scale parameter + * @throws NotStrictlyPositiveException if {@code shape <= 0} or {@code scale <= 0}. + */ + public GammaDistribution(double shape, double scale) throws NotStrictlyPositiveException { + this(shape, scale, DEFAULT_INVERSE_ABSOLUTE_ACCURACY); + } + + /** + * Creates a new gamma distribution with specified values of the shape and scale parameters. + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link Well19937c} as + * random generator to be used for sampling only (see {@link #sample()} and {@link + * #sample(int)}). In case no sampling is needed for the created distribution, it is advised to + * pass {@code null} as random generator via the appropriate constructors to avoid the + * additional initialisation overhead. + * + * @param shape the shape parameter + * @param scale the scale parameter + * @param inverseCumAccuracy the maximum absolute error in inverse cumulative probability + * estimates (defaults to {@link #DEFAULT_INVERSE_ABSOLUTE_ACCURACY}). + * @throws NotStrictlyPositiveException if {@code shape <= 0} or {@code scale <= 0}. + * @since 2.1 + */ + public GammaDistribution(double shape, double scale, double inverseCumAccuracy) + throws NotStrictlyPositiveException { + this(new Well19937c(), shape, scale, inverseCumAccuracy); + } + + /** + * Creates a Gamma distribution. + * + * @param rng Random number generator. + * @param shape the shape parameter + * @param scale the scale parameter + * @throws NotStrictlyPositiveException if {@code shape <= 0} or {@code scale <= 0}. + * @since 3.3 + */ + public GammaDistribution(RandomGenerator rng, double shape, double scale) + throws NotStrictlyPositiveException { + this(rng, shape, scale, DEFAULT_INVERSE_ABSOLUTE_ACCURACY); + } + + /** + * Creates a Gamma distribution. + * + * @param rng Random number generator. + * @param shape the shape parameter + * @param scale the scale parameter + * @param inverseCumAccuracy the maximum absolute error in inverse cumulative probability + * estimates (defaults to {@link #DEFAULT_INVERSE_ABSOLUTE_ACCURACY}). + * @throws NotStrictlyPositiveException if {@code shape <= 0} or {@code scale <= 0}. + * @since 3.1 + */ + public GammaDistribution( + RandomGenerator rng, double shape, double scale, double inverseCumAccuracy) + throws NotStrictlyPositiveException { + super(rng); + + if (shape <= 0) { + throw new NotStrictlyPositiveException(LocalizedFormats.SHAPE, shape); + } + if (scale <= 0) { + throw new NotStrictlyPositiveException(LocalizedFormats.SCALE, scale); + } + + this.shape = shape; + this.scale = scale; + this.solverAbsoluteAccuracy = inverseCumAccuracy; + this.shiftedShape = shape + Gamma.LANCZOS_G + 0.5; + final double aux = FastMath.E / (2.0 * FastMath.PI * shiftedShape); + this.densityPrefactor2 = shape * FastMath.sqrt(aux) / Gamma.lanczos(shape); + this.logDensityPrefactor2 = + FastMath.log(shape) + 0.5 * FastMath.log(aux) - FastMath.log(Gamma.lanczos(shape)); + this.densityPrefactor1 = + this.densityPrefactor2 + / scale + * FastMath.pow(shiftedShape, -shape) + * FastMath.exp(shape + Gamma.LANCZOS_G); + this.logDensityPrefactor1 = + this.logDensityPrefactor2 + - FastMath.log(scale) + - FastMath.log(shiftedShape) * shape + + shape + + Gamma.LANCZOS_G; + this.minY = shape + Gamma.LANCZOS_G - FastMath.log(Double.MAX_VALUE); + this.maxLogY = FastMath.log(Double.MAX_VALUE) / (shape - 1.0); + } + + /** + * Returns the shape parameter of {@code this} distribution. + * + * @return the shape parameter + * @deprecated as of version 3.1, {@link #getShape()} should be preferred. This method will be + * removed in version 4.0. + */ + @Deprecated + public double getAlpha() { + return shape; + } + + /** + * Returns the shape parameter of {@code this} distribution. + * + * @return the shape parameter + * @since 3.1 + */ + public double getShape() { + return shape; + } + + /** + * Returns the scale parameter of {@code this} distribution. + * + * @return the scale parameter + * @deprecated as of version 3.1, {@link #getScale()} should be preferred. This method will be + * removed in version 4.0. + */ + @Deprecated + public double getBeta() { + return scale; + } + + /** + * Returns the scale parameter of {@code this} distribution. + * + * @return the scale parameter + * @since 3.1 + */ + public double getScale() { + return scale; + } + + /** {@inheritDoc} */ + public double density(double x) { + /* The present method must return the value of + * + * 1 x a - x + * ---------- (-) exp(---) + * x Gamma(a) b b + * + * where a is the shape parameter, and b the scale parameter. + * Substituting the Lanczos approximation of Gamma(a) leads to the + * following expression of the density + * + * a e 1 y a + * - sqrt(------------------) ---- (-----------) exp(a - y + g), + * x 2 pi (a + g + 0.5) L(a) a + g + 0.5 + * + * where y = x / b. The above formula is the "natural" computation, which + * is implemented when no overflow is likely to occur. If overflow occurs + * with the natural computation, the following identity is used. It is + * based on the BOOST library + * http://www.boost.org/doc/libs/1_35_0/libs/math/doc/sf_and_dist/html/math_toolkit/special/sf_gamma/igamma.html + * Formula (15) needs adaptations, which are detailed below. + * + * y a + * (-----------) exp(a - y + g) + * a + g + 0.5 + * y - a - g - 0.5 y (g + 0.5) + * = exp(a log1pm(---------------) - ----------- + g), + * a + g + 0.5 a + g + 0.5 + * + * where log1pm(z) = log(1 + z) - z. Therefore, the value to be + * returned is + * + * a e 1 + * - sqrt(------------------) ---- + * x 2 pi (a + g + 0.5) L(a) + * y - a - g - 0.5 y (g + 0.5) + * * exp(a log1pm(---------------) - ----------- + g). + * a + g + 0.5 a + g + 0.5 + */ + if (x < 0) { + return 0; + } + final double y = x / scale; + if ((y <= minY) || (FastMath.log(y) >= maxLogY)) { + /* + * Overflow. + */ + final double aux1 = (y - shiftedShape) / shiftedShape; + final double aux2 = shape * (FastMath.log1p(aux1) - aux1); + final double aux3 = + -y * (Gamma.LANCZOS_G + 0.5) / shiftedShape + Gamma.LANCZOS_G + aux2; + return densityPrefactor2 / x * FastMath.exp(aux3); + } + /* + * Natural calculation. + */ + return densityPrefactor1 * FastMath.exp(-y) * FastMath.pow(y, shape - 1); + } + + /** {@inheritDoc} * */ + @Override + public double logDensity(double x) { + /* + * see the comment in {@link #density(double)} for computation details + */ + if (x < 0) { + return Double.NEGATIVE_INFINITY; + } + final double y = x / scale; + if ((y <= minY) || (FastMath.log(y) >= maxLogY)) { + /* + * Overflow. + */ + final double aux1 = (y - shiftedShape) / shiftedShape; + final double aux2 = shape * (FastMath.log1p(aux1) - aux1); + final double aux3 = + -y * (Gamma.LANCZOS_G + 0.5) / shiftedShape + Gamma.LANCZOS_G + aux2; + return logDensityPrefactor2 - FastMath.log(x) + aux3; + } + /* + * Natural calculation. + */ + return logDensityPrefactor1 - y + FastMath.log(y) * (shape - 1); + } + + /** + * {@inheritDoc} + * + * <p>The implementation of this method is based on: + * + * <ul> + * <li><a href="http://mathworld.wolfram.com/Chi-SquaredDistribution.html">Chi-Squared + * Distribution</a>, equation (9). + * <li>Casella, G., & Berger, R. (1990). <i>Statistical Inference</i>. Belmont, CA: Duxbury + * Press. + * </ul> + */ + public double cumulativeProbability(double x) { + double ret; + + if (x <= 0) { + ret = 0; + } else { + ret = Gamma.regularizedGammaP(shape, x / scale); + } + + return ret; + } + + /** {@inheritDoc} */ + @Override + protected double getSolverAbsoluteAccuracy() { + return solverAbsoluteAccuracy; + } + + /** + * {@inheritDoc} + * + * <p>For shape parameter {@code alpha} and scale parameter {@code beta}, the mean is {@code + * alpha * beta}. + */ + public double getNumericalMean() { + return shape * scale; + } + + /** + * {@inheritDoc} + * + * <p>For shape parameter {@code alpha} and scale parameter {@code beta}, the variance is {@code + * alpha * beta^2}. + * + * @return {@inheritDoc} + */ + public double getNumericalVariance() { + return shape * scale * scale; + } + + /** + * {@inheritDoc} + * + * <p>The lower bound of the support is always 0 no matter the parameters. + * + * @return lower bound of the support (always 0) + */ + public double getSupportLowerBound() { + return 0; + } + + /** + * {@inheritDoc} + * + * <p>The upper bound of the support is always positive infinity no matter the parameters. + * + * @return upper bound of the support (always Double.POSITIVE_INFINITY) + */ + public double getSupportUpperBound() { + return Double.POSITIVE_INFINITY; + } + + /** {@inheritDoc} */ + public boolean isSupportLowerBoundInclusive() { + return true; + } + + /** {@inheritDoc} */ + public boolean isSupportUpperBoundInclusive() { + return false; + } + + /** + * {@inheritDoc} + * + * <p>The support of this distribution is connected. + * + * @return {@code true} + */ + public boolean isSupportConnected() { + return true; + } + + /** + * This implementation uses the following algorithms: + * + * <p>For 0 < shape < 1: <br> + * Ahrens, J. H. and Dieter, U., <i>Computer methods for sampling from gamma, beta, Poisson and + * binomial distributions.</i> Computing, 12, 223-246, 1974. + * + * <p>For shape >= 1: <br> + * Marsaglia and Tsang, <i>A Simple Method for Generating Gamma Variables.</i> ACM Transactions + * on Mathematical Software, Volume 26 Issue 3, September, 2000. + * + * @return random value sampled from the Gamma(shape, scale) distribution + */ + @Override + public double sample() { + if (shape < 1) { + // [1]: p. 228, Algorithm GS + + while (true) { + // Step 1: + final double u = random.nextDouble(); + final double bGS = 1 + shape / FastMath.E; + final double p = bGS * u; + + if (p <= 1) { + // Step 2: + + final double x = FastMath.pow(p, 1 / shape); + final double u2 = random.nextDouble(); + + if (u2 > FastMath.exp(-x)) { + // Reject + continue; + } else { + return scale * x; + } + } else { + // Step 3: + + final double x = -1 * FastMath.log((bGS - p) / shape); + final double u2 = random.nextDouble(); + + if (u2 > FastMath.pow(x, shape - 1)) { + // Reject + continue; + } else { + return scale * x; + } + } + } + } + + // Now shape >= 1 + + final double d = shape - 0.333333333333333333; + final double c = 1 / (3 * FastMath.sqrt(d)); + + while (true) { + final double x = random.nextGaussian(); + final double v = (1 + c * x) * (1 + c * x) * (1 + c * x); + + if (v <= 0) { + continue; + } + + final double x2 = x * x; + final double u = random.nextDouble(); + + // Squeeze + if (u < 1 - 0.0331 * x2 * x2) { + return scale * d * v; + } + + if (FastMath.log(u) < 0.5 * x2 + d * (1 - v + FastMath.log(v))) { + return scale * d * v; + } + } + } +} diff --git a/src/main/java/org/apache/commons/math3/distribution/GeometricDistribution.java b/src/main/java/org/apache/commons/math3/distribution/GeometricDistribution.java new file mode 100644 index 0000000..89c0a59 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/distribution/GeometricDistribution.java @@ -0,0 +1,186 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.distribution; + +import org.apache.commons.math3.exception.OutOfRangeException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.random.RandomGenerator; +import org.apache.commons.math3.random.Well19937c; +import org.apache.commons.math3.util.FastMath; + +/** + * Implementation of the geometric distribution. + * + * @see <a href="http://en.wikipedia.org/wiki/Geometric_distribution">Geometric distribution + * (Wikipedia)</a> + * @see <a href="http://mathworld.wolfram.com/GeometricDistribution.html">Geometric Distribution + * (MathWorld)</a> + * @since 3.3 + */ +public class GeometricDistribution extends AbstractIntegerDistribution { + + /** Serializable version identifier. */ + private static final long serialVersionUID = 20130507L; + + /** The probability of success. */ + private final double probabilityOfSuccess; + + /** {@code log(p)} where p is the probability of success. */ + private final double logProbabilityOfSuccess; + + /** {@code log(1 - p)} where p is the probability of success. */ + private final double log1mProbabilityOfSuccess; + + /** + * Create a geometric distribution with the given probability of success. + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link Well19937c} as + * random generator to be used for sampling only (see {@link #sample()} and {@link + * #sample(int)}). In case no sampling is needed for the created distribution, it is advised to + * pass {@code null} as random generator via the appropriate constructors to avoid the + * additional initialisation overhead. + * + * @param p probability of success. + * @throws OutOfRangeException if {@code p <= 0} or {@code p > 1}. + */ + public GeometricDistribution(double p) { + this(new Well19937c(), p); + } + + /** + * Creates a geometric distribution. + * + * @param rng Random number generator. + * @param p Probability of success. + * @throws OutOfRangeException if {@code p <= 0} or {@code p > 1}. + */ + public GeometricDistribution(RandomGenerator rng, double p) { + super(rng); + + if (p <= 0 || p > 1) { + throw new OutOfRangeException(LocalizedFormats.OUT_OF_RANGE_LEFT, p, 0, 1); + } + + probabilityOfSuccess = p; + logProbabilityOfSuccess = FastMath.log(p); + log1mProbabilityOfSuccess = FastMath.log1p(-p); + } + + /** + * Access the probability of success for this distribution. + * + * @return the probability of success. + */ + public double getProbabilityOfSuccess() { + return probabilityOfSuccess; + } + + /** {@inheritDoc} */ + public double probability(int x) { + if (x < 0) { + return 0.0; + } else { + return FastMath.exp(log1mProbabilityOfSuccess * x) * probabilityOfSuccess; + } + } + + /** {@inheritDoc} */ + @Override + public double logProbability(int x) { + if (x < 0) { + return Double.NEGATIVE_INFINITY; + } else { + return x * log1mProbabilityOfSuccess + logProbabilityOfSuccess; + } + } + + /** {@inheritDoc} */ + public double cumulativeProbability(int x) { + if (x < 0) { + return 0.0; + } else { + return -FastMath.expm1(log1mProbabilityOfSuccess * (x + 1)); + } + } + + /** + * {@inheritDoc} + * + * <p>For probability parameter {@code p}, the mean is {@code (1 - p) / p}. + */ + public double getNumericalMean() { + return (1 - probabilityOfSuccess) / probabilityOfSuccess; + } + + /** + * {@inheritDoc} + * + * <p>For probability parameter {@code p}, the variance is {@code (1 - p) / (p * p)}. + */ + public double getNumericalVariance() { + return (1 - probabilityOfSuccess) / (probabilityOfSuccess * probabilityOfSuccess); + } + + /** + * {@inheritDoc} + * + * <p>The lower bound of the support is always 0. + * + * @return lower bound of the support (always 0) + */ + public int getSupportLowerBound() { + return 0; + } + + /** + * {@inheritDoc} + * + * <p>The upper bound of the support is infinite (which we approximate as {@code + * Integer.MAX_VALUE}). + * + * @return upper bound of the support (always Integer.MAX_VALUE) + */ + public int getSupportUpperBound() { + return Integer.MAX_VALUE; + } + + /** + * {@inheritDoc} + * + * <p>The support of this distribution is connected. + * + * @return {@code true} + */ + public boolean isSupportConnected() { + return true; + } + + /** {@inheritDoc} */ + @Override + public int inverseCumulativeProbability(double p) throws OutOfRangeException { + if (p < 0 || p > 1) { + throw new OutOfRangeException(p, 0, 1); + } + if (p == 1) { + return Integer.MAX_VALUE; + } + if (p == 0) { + return 0; + } + return Math.max(0, (int) Math.ceil(FastMath.log1p(-p) / log1mProbabilityOfSuccess - 1)); + } +} diff --git a/src/main/java/org/apache/commons/math3/distribution/GumbelDistribution.java b/src/main/java/org/apache/commons/math3/distribution/GumbelDistribution.java new file mode 100644 index 0000000..78280a5 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/distribution/GumbelDistribution.java @@ -0,0 +1,167 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.distribution; + +import org.apache.commons.math3.exception.NotStrictlyPositiveException; +import org.apache.commons.math3.exception.OutOfRangeException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.random.RandomGenerator; +import org.apache.commons.math3.random.Well19937c; +import org.apache.commons.math3.util.FastMath; +import org.apache.commons.math3.util.MathUtils; + +/** + * This class implements the Gumbel distribution. + * + * @see <a href="http://en.wikipedia.org/wiki/Gumbel_distribution">Gumbel Distribution + * (Wikipedia)</a> + * @see <a href="http://mathworld.wolfram.com/GumbelDistribution.html">Gumbel Distribution + * (Mathworld)</a> + * @since 3.4 + */ +public class GumbelDistribution extends AbstractRealDistribution { + + /** Serializable version identifier. */ + private static final long serialVersionUID = 20141003; + + /** + * Approximation of Euler's constant see + * http://mathworld.wolfram.com/Euler-MascheroniConstantApproximations.html + */ + private static final double EULER = FastMath.PI / (2 * FastMath.E); + + /** The location parameter. */ + private final double mu; + + /** The scale parameter. */ + private final double beta; + + /** + * Build a new instance. + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link Well19937c} as + * random generator to be used for sampling only (see {@link #sample()} and {@link + * #sample(int)}). In case no sampling is needed for the created distribution, it is advised to + * pass {@code null} as random generator via the appropriate constructors to avoid the + * additional initialisation overhead. + * + * @param mu location parameter + * @param beta scale parameter (must be positive) + * @throws NotStrictlyPositiveException if {@code beta <= 0} + */ + public GumbelDistribution(double mu, double beta) { + this(new Well19937c(), mu, beta); + } + + /** + * Build a new instance. + * + * @param rng Random number generator + * @param mu location parameter + * @param beta scale parameter (must be positive) + * @throws NotStrictlyPositiveException if {@code beta <= 0} + */ + public GumbelDistribution(RandomGenerator rng, double mu, double beta) { + super(rng); + + if (beta <= 0) { + throw new NotStrictlyPositiveException(LocalizedFormats.SCALE, beta); + } + + this.beta = beta; + this.mu = mu; + } + + /** + * Access the location parameter, {@code mu}. + * + * @return the location parameter. + */ + public double getLocation() { + return mu; + } + + /** + * Access the scale parameter, {@code beta}. + * + * @return the scale parameter. + */ + public double getScale() { + return beta; + } + + /** {@inheritDoc} */ + public double density(double x) { + final double z = (x - mu) / beta; + final double t = FastMath.exp(-z); + return FastMath.exp(-z - t) / beta; + } + + /** {@inheritDoc} */ + public double cumulativeProbability(double x) { + final double z = (x - mu) / beta; + return FastMath.exp(-FastMath.exp(-z)); + } + + /** {@inheritDoc} */ + @Override + public double inverseCumulativeProbability(double p) throws OutOfRangeException { + if (p < 0.0 || p > 1.0) { + throw new OutOfRangeException(p, 0.0, 1.0); + } else if (p == 0) { + return Double.NEGATIVE_INFINITY; + } else if (p == 1) { + return Double.POSITIVE_INFINITY; + } + return mu - FastMath.log(-FastMath.log(p)) * beta; + } + + /** {@inheritDoc} */ + public double getNumericalMean() { + return mu + EULER * beta; + } + + /** {@inheritDoc} */ + public double getNumericalVariance() { + return (MathUtils.PI_SQUARED) / 6.0 * (beta * beta); + } + + /** {@inheritDoc} */ + public double getSupportLowerBound() { + return Double.NEGATIVE_INFINITY; + } + + /** {@inheritDoc} */ + public double getSupportUpperBound() { + return Double.POSITIVE_INFINITY; + } + + /** {@inheritDoc} */ + public boolean isSupportLowerBoundInclusive() { + return false; + } + + /** {@inheritDoc} */ + public boolean isSupportUpperBoundInclusive() { + return false; + } + + /** {@inheritDoc} */ + public boolean isSupportConnected() { + return true; + } +} diff --git a/src/main/java/org/apache/commons/math3/distribution/HypergeometricDistribution.java b/src/main/java/org/apache/commons/math3/distribution/HypergeometricDistribution.java new file mode 100644 index 0000000..dece6c8 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/distribution/HypergeometricDistribution.java @@ -0,0 +1,347 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.math3.distribution; + +import org.apache.commons.math3.exception.NotPositiveException; +import org.apache.commons.math3.exception.NotStrictlyPositiveException; +import org.apache.commons.math3.exception.NumberIsTooLargeException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.random.RandomGenerator; +import org.apache.commons.math3.random.Well19937c; +import org.apache.commons.math3.util.FastMath; + +/** + * Implementation of the hypergeometric distribution. + * + * @see <a href="http://en.wikipedia.org/wiki/Hypergeometric_distribution">Hypergeometric + * distribution (Wikipedia)</a> + * @see <a href="http://mathworld.wolfram.com/HypergeometricDistribution.html">Hypergeometric + * distribution (MathWorld)</a> + */ +public class HypergeometricDistribution extends AbstractIntegerDistribution { + /** Serializable version identifier. */ + private static final long serialVersionUID = -436928820673516179L; + + /** The number of successes in the population. */ + private final int numberOfSuccesses; + + /** The population size. */ + private final int populationSize; + + /** The sample size. */ + private final int sampleSize; + + /** Cached numerical variance */ + private double numericalVariance = Double.NaN; + + /** Whether or not the numerical variance has been calculated */ + private boolean numericalVarianceIsCalculated = false; + + /** + * Construct a new hypergeometric distribution with the specified population size, number of + * successes in the population, and sample size. + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link Well19937c} as + * random generator to be used for sampling only (see {@link #sample()} and {@link + * #sample(int)}). In case no sampling is needed for the created distribution, it is advised to + * pass {@code null} as random generator via the appropriate constructors to avoid the + * additional initialisation overhead. + * + * @param populationSize Population size. + * @param numberOfSuccesses Number of successes in the population. + * @param sampleSize Sample size. + * @throws NotPositiveException if {@code numberOfSuccesses < 0}. + * @throws NotStrictlyPositiveException if {@code populationSize <= 0}. + * @throws NumberIsTooLargeException if {@code numberOfSuccesses > populationSize}, or {@code + * sampleSize > populationSize}. + */ + public HypergeometricDistribution(int populationSize, int numberOfSuccesses, int sampleSize) + throws NotPositiveException, NotStrictlyPositiveException, NumberIsTooLargeException { + this(new Well19937c(), populationSize, numberOfSuccesses, sampleSize); + } + + /** + * Creates a new hypergeometric distribution. + * + * @param rng Random number generator. + * @param populationSize Population size. + * @param numberOfSuccesses Number of successes in the population. + * @param sampleSize Sample size. + * @throws NotPositiveException if {@code numberOfSuccesses < 0}. + * @throws NotStrictlyPositiveException if {@code populationSize <= 0}. + * @throws NumberIsTooLargeException if {@code numberOfSuccesses > populationSize}, or {@code + * sampleSize > populationSize}. + * @since 3.1 + */ + public HypergeometricDistribution( + RandomGenerator rng, int populationSize, int numberOfSuccesses, int sampleSize) + throws NotPositiveException, NotStrictlyPositiveException, NumberIsTooLargeException { + super(rng); + + if (populationSize <= 0) { + throw new NotStrictlyPositiveException( + LocalizedFormats.POPULATION_SIZE, populationSize); + } + if (numberOfSuccesses < 0) { + throw new NotPositiveException(LocalizedFormats.NUMBER_OF_SUCCESSES, numberOfSuccesses); + } + if (sampleSize < 0) { + throw new NotPositiveException(LocalizedFormats.NUMBER_OF_SAMPLES, sampleSize); + } + + if (numberOfSuccesses > populationSize) { + throw new NumberIsTooLargeException( + LocalizedFormats.NUMBER_OF_SUCCESS_LARGER_THAN_POPULATION_SIZE, + numberOfSuccesses, + populationSize, + true); + } + if (sampleSize > populationSize) { + throw new NumberIsTooLargeException( + LocalizedFormats.SAMPLE_SIZE_LARGER_THAN_POPULATION_SIZE, + sampleSize, + populationSize, + true); + } + + this.numberOfSuccesses = numberOfSuccesses; + this.populationSize = populationSize; + this.sampleSize = sampleSize; + } + + /** {@inheritDoc} */ + public double cumulativeProbability(int x) { + double ret; + + int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize); + if (x < domain[0]) { + ret = 0.0; + } else if (x >= domain[1]) { + ret = 1.0; + } else { + ret = innerCumulativeProbability(domain[0], x, 1); + } + + return ret; + } + + /** + * Return the domain for the given hypergeometric distribution parameters. + * + * @param n Population size. + * @param m Number of successes in the population. + * @param k Sample size. + * @return a two element array containing the lower and upper bounds of the hypergeometric + * distribution. + */ + private int[] getDomain(int n, int m, int k) { + return new int[] {getLowerDomain(n, m, k), getUpperDomain(m, k)}; + } + + /** + * Return the lowest domain value for the given hypergeometric distribution parameters. + * + * @param n Population size. + * @param m Number of successes in the population. + * @param k Sample size. + * @return the lowest domain value of the hypergeometric distribution. + */ + private int getLowerDomain(int n, int m, int k) { + return FastMath.max(0, m - (n - k)); + } + + /** + * Access the number of successes. + * + * @return the number of successes. + */ + public int getNumberOfSuccesses() { + return numberOfSuccesses; + } + + /** + * Access the population size. + * + * @return the population size. + */ + public int getPopulationSize() { + return populationSize; + } + + /** + * Access the sample size. + * + * @return the sample size. + */ + public int getSampleSize() { + return sampleSize; + } + + /** + * Return the highest domain value for the given hypergeometric distribution parameters. + * + * @param m Number of successes in the population. + * @param k Sample size. + * @return the highest domain value of the hypergeometric distribution. + */ + private int getUpperDomain(int m, int k) { + return FastMath.min(k, m); + } + + /** {@inheritDoc} */ + public double probability(int x) { + final double logProbability = logProbability(x); + return logProbability == Double.NEGATIVE_INFINITY ? 0 : FastMath.exp(logProbability); + } + + /** {@inheritDoc} */ + @Override + public double logProbability(int x) { + double ret; + + int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize); + if (x < domain[0] || x > domain[1]) { + ret = Double.NEGATIVE_INFINITY; + } else { + double p = (double) sampleSize / (double) populationSize; + double q = (double) (populationSize - sampleSize) / (double) populationSize; + double p1 = SaddlePointExpansion.logBinomialProbability(x, numberOfSuccesses, p, q); + double p2 = + SaddlePointExpansion.logBinomialProbability( + sampleSize - x, populationSize - numberOfSuccesses, p, q); + double p3 = + SaddlePointExpansion.logBinomialProbability(sampleSize, populationSize, p, q); + ret = p1 + p2 - p3; + } + + return ret; + } + + /** + * For this distribution, {@code X}, this method returns {@code P(X >= x)}. + * + * @param x Value at which the CDF is evaluated. + * @return the upper tail CDF for this distribution. + * @since 1.1 + */ + public double upperCumulativeProbability(int x) { + double ret; + + final int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize); + if (x <= domain[0]) { + ret = 1.0; + } else if (x > domain[1]) { + ret = 0.0; + } else { + ret = innerCumulativeProbability(domain[1], x, -1); + } + + return ret; + } + + /** + * For this distribution, {@code X}, this method returns {@code P(x0 <= X <= x1)}. This + * probability is computed by summing the point probabilities for the values {@code x0, x0 + 1, + * x0 + 2, ..., x1}, in the order directed by {@code dx}. + * + * @param x0 Inclusive lower bound. + * @param x1 Inclusive upper bound. + * @param dx Direction of summation (1 indicates summing from x0 to x1, and 0 indicates summing + * from x1 to x0). + * @return {@code P(x0 <= X <= x1)}. + */ + private double innerCumulativeProbability(int x0, int x1, int dx) { + double ret = probability(x0); + while (x0 != x1) { + x0 += dx; + ret += probability(x0); + } + return ret; + } + + /** + * {@inheritDoc} + * + * <p>For population size {@code N}, number of successes {@code m}, and sample size {@code n}, + * the mean is {@code n * m / N}. + */ + public double getNumericalMean() { + return getSampleSize() * (getNumberOfSuccesses() / (double) getPopulationSize()); + } + + /** + * {@inheritDoc} + * + * <p>For population size {@code N}, number of successes {@code m}, and sample size {@code n}, + * the variance is {@code [n * m * (N - n) * (N - m)] / [N^2 * (N - 1)]}. + */ + public double getNumericalVariance() { + if (!numericalVarianceIsCalculated) { + numericalVariance = calculateNumericalVariance(); + numericalVarianceIsCalculated = true; + } + return numericalVariance; + } + + /** + * Used by {@link #getNumericalVariance()}. + * + * @return the variance of this distribution + */ + protected double calculateNumericalVariance() { + final double N = getPopulationSize(); + final double m = getNumberOfSuccesses(); + final double n = getSampleSize(); + return (n * m * (N - n) * (N - m)) / (N * N * (N - 1)); + } + + /** + * {@inheritDoc} + * + * <p>For population size {@code N}, number of successes {@code m}, and sample size {@code n}, + * the lower bound of the support is {@code max(0, n + m - N)}. + * + * @return lower bound of the support + */ + public int getSupportLowerBound() { + return FastMath.max(0, getSampleSize() + getNumberOfSuccesses() - getPopulationSize()); + } + + /** + * {@inheritDoc} + * + * <p>For number of successes {@code m} and sample size {@code n}, the upper bound of the + * support is {@code min(m, n)}. + * + * @return upper bound of the support + */ + public int getSupportUpperBound() { + return FastMath.min(getNumberOfSuccesses(), getSampleSize()); + } + + /** + * {@inheritDoc} + * + * <p>The support of this distribution is connected. + * + * @return {@code true} + */ + public boolean isSupportConnected() { + return true; + } +} diff --git a/src/main/java/org/apache/commons/math3/distribution/IntegerDistribution.java b/src/main/java/org/apache/commons/math3/distribution/IntegerDistribution.java new file mode 100644 index 0000000..c188a78 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/distribution/IntegerDistribution.java @@ -0,0 +1,145 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.distribution; + +import org.apache.commons.math3.exception.NumberIsTooLargeException; +import org.apache.commons.math3.exception.OutOfRangeException; + +/** Interface for distributions on the integers. */ +public interface IntegerDistribution { + /** + * For a random variable {@code X} whose values are distributed according to this distribution, + * this method returns {@code P(X = x)}. In other words, this method represents the probability + * mass function (PMF) for the distribution. + * + * @param x the point at which the PMF is evaluated + * @return the value of the probability mass function at {@code x} + */ + double probability(int x); + + /** + * For a random variable {@code X} whose values are distributed according to this distribution, + * this method returns {@code P(X <= x)}. In other words, this method represents the + * (cumulative) distribution function (CDF) for this distribution. + * + * @param x the point at which the CDF is evaluated + * @return the probability that a random variable with this distribution takes a value less than + * or equal to {@code x} + */ + double cumulativeProbability(int x); + + /** + * For a random variable {@code X} whose values are distributed according to this distribution, + * this method returns {@code P(x0 < X <= x1)}. + * + * @param x0 the exclusive lower bound + * @param x1 the inclusive upper bound + * @return the probability that a random variable with this distribution will take a value + * between {@code x0} and {@code x1}, excluding the lower and including the upper endpoint + * @throws NumberIsTooLargeException if {@code x0 > x1} + */ + double cumulativeProbability(int x0, int x1) throws NumberIsTooLargeException; + + /** + * Computes the quantile function of this distribution. For a random variable {@code X} + * distributed according to this distribution, the returned value is + * + * <ul> + * <li><code>inf{x in Z | P(X<=x) >= p}</code> for {@code 0 < p <= 1}, + * <li><code>inf{x in Z | P(X<=x) > 0}</code> for {@code p = 0}. + * </ul> + * + * If the result exceeds the range of the data type {@code int}, then {@code Integer.MIN_VALUE} + * or {@code Integer.MAX_VALUE} is returned. + * + * @param p the cumulative probability + * @return the smallest {@code p}-quantile of this distribution (largest 0-quantile for {@code p + * = 0}) + * @throws OutOfRangeException if {@code p < 0} or {@code p > 1} + */ + int inverseCumulativeProbability(double p) throws OutOfRangeException; + + /** + * Use this method to get the numerical value of the mean of this distribution. + * + * @return the mean or {@code Double.NaN} if it is not defined + */ + double getNumericalMean(); + + /** + * Use this method to get the numerical value of the variance of this distribution. + * + * @return the variance (possibly {@code Double.POSITIVE_INFINITY} or {@code Double.NaN} if it + * is not defined) + */ + double getNumericalVariance(); + + /** + * Access the lower bound of the support. This method must return the same value as {@code + * inverseCumulativeProbability(0)}. In other words, this method must return + * + * <p><code>inf {x in Z | P(X <= x) > 0}</code>. + * + * @return lower bound of the support ({@code Integer.MIN_VALUE} for negative infinity) + */ + int getSupportLowerBound(); + + /** + * Access the upper bound of the support. This method must return the same value as {@code + * inverseCumulativeProbability(1)}. In other words, this method must return + * + * <p><code>inf {x in R | P(X <= x) = 1}</code>. + * + * @return upper bound of the support ({@code Integer.MAX_VALUE} for positive infinity) + */ + int getSupportUpperBound(); + + /** + * Use this method to get information about whether the support is connected, i.e. whether all + * integers between the lower and upper bound of the support are included in the support. + * + * @return whether the support is connected or not + */ + boolean isSupportConnected(); + + /** + * Reseed the random generator used to generate samples. + * + * @param seed the new seed + * @since 3.0 + */ + void reseedRandomGenerator(long seed); + + /** + * Generate a random value sampled from this distribution. + * + * @return a random value + * @since 3.0 + */ + int sample(); + + /** + * Generate a random sample from the distribution. + * + * @param sampleSize the number of random values to generate + * @return an array representing the random sample + * @throws org.apache.commons.math3.exception.NotStrictlyPositiveException if {@code sampleSize} + * is not positive + * @since 3.0 + */ + int[] sample(int sampleSize); +} diff --git a/src/main/java/org/apache/commons/math3/distribution/KolmogorovSmirnovDistribution.java b/src/main/java/org/apache/commons/math3/distribution/KolmogorovSmirnovDistribution.java new file mode 100644 index 0000000..3ee007f --- /dev/null +++ b/src/main/java/org/apache/commons/math3/distribution/KolmogorovSmirnovDistribution.java @@ -0,0 +1,338 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.math3.distribution; + +import org.apache.commons.math3.exception.MathArithmeticException; +import org.apache.commons.math3.exception.NotStrictlyPositiveException; +import org.apache.commons.math3.exception.NumberIsTooLargeException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.fraction.BigFraction; +import org.apache.commons.math3.fraction.BigFractionField; +import org.apache.commons.math3.fraction.FractionConversionException; +import org.apache.commons.math3.linear.Array2DRowFieldMatrix; +import org.apache.commons.math3.linear.Array2DRowRealMatrix; +import org.apache.commons.math3.linear.FieldMatrix; +import org.apache.commons.math3.linear.RealMatrix; +import org.apache.commons.math3.util.FastMath; + +import java.io.Serializable; +import java.math.BigDecimal; + +/** + * Implementation of the Kolmogorov-Smirnov distribution. + * + * <p>Treats the distribution of the two-sided {@code P(D_n < d)} where {@code D_n = sup_x |G(x) - + * G_n (x)|} for the theoretical cdf {@code G} and the empirical cdf {@code G_n}. + * + * <p>This implementation is based on [1] with certain quick decisions for extreme values given in + * [2]. + * + * <p>In short, when wanting to evaluate {@code P(D_n < d)}, the method in [1] is to write {@code d + * = (k - h) / n} for positive integer {@code k} and {@code 0 <= h < 1}. Then {@code P(D_n < d) = + * (n! / n^n) * t_kk}, where {@code t_kk} is the {@code (k, k)}'th entry in the special matrix + * {@code H^n}, i.e. {@code H} to the {@code n}'th power. + * + * <p>References: + * + * <ul> + * <li>[1] <a href="http://www.jstatsoft.org/v08/i18/">Evaluating Kolmogorov's Distribution</a> by + * George Marsaglia, Wai Wan Tsang, and Jingbo Wang + * <li>[2] <a href="http://www.jstatsoft.org/v39/i11/">Computing the Two-Sided Kolmogorov-Smirnov + * Distribution</a> by Richard Simard and Pierre L'Ecuyer + * </ul> + * + * Note that [1] contains an error in computing h, refer to <a + * href="https://issues.apache.org/jira/browse/MATH-437">MATH-437</a> for details. + * + * @see <a href="http://en.wikipedia.org/wiki/Kolmogorov-Smirnov_test">Kolmogorov-Smirnov test + * (Wikipedia)</a> + * @deprecated to be removed in version 4.0 - use {@link + * org.apache.commons.math3.stat.inference.KolmogorovSmirnovTest} + */ +public class KolmogorovSmirnovDistribution implements Serializable { + + /** Serializable version identifier. */ + private static final long serialVersionUID = -4670676796862967187L; + + /** Number of observations. */ + private int n; + + /** + * @param n Number of observations + * @throws NotStrictlyPositiveException if {@code n <= 0} + */ + public KolmogorovSmirnovDistribution(int n) throws NotStrictlyPositiveException { + if (n <= 0) { + throw new NotStrictlyPositiveException( + LocalizedFormats.NOT_POSITIVE_NUMBER_OF_SAMPLES, n); + } + + this.n = n; + } + + /** + * Calculates {@code P(D_n < d)} using method described in [1] with quick decisions for extreme + * values given in [2] (see above). The result is not exact as with {@link + * KolmogorovSmirnovDistribution#cdfExact(double)} because calculations are based on {@code + * double} rather than {@link org.apache.commons.math3.fraction.BigFraction}. + * + * @param d statistic + * @return the two-sided probability of {@code P(D_n < d)} + * @throws MathArithmeticException if algorithm fails to convert {@code h} to a {@link + * org.apache.commons.math3.fraction.BigFraction} in expressing {@code d} as {@code (k - h) + * / m} for integer {@code k, m} and {@code 0 <= h < 1}. + */ + public double cdf(double d) throws MathArithmeticException { + return this.cdf(d, false); + } + + /** + * Calculates {@code P(D_n < d)} using method described in [1] with quick decisions for extreme + * values given in [2] (see above). The result is exact in the sense that BigFraction/BigReal is + * used everywhere at the expense of very slow execution time. Almost never choose this in real + * applications unless you are very sure; this is almost solely for verification purposes. + * Normally, you would choose {@link KolmogorovSmirnovDistribution#cdf(double)} + * + * @param d statistic + * @return the two-sided probability of {@code P(D_n < d)} + * @throws MathArithmeticException if algorithm fails to convert {@code h} to a {@link + * org.apache.commons.math3.fraction.BigFraction} in expressing {@code d} as {@code (k - h) + * / m} for integer {@code k, m} and {@code 0 <= h < 1}. + */ + public double cdfExact(double d) throws MathArithmeticException { + return this.cdf(d, true); + } + + /** + * Calculates {@code P(D_n < d)} using method described in [1] with quick decisions for extreme + * values given in [2] (see above). + * + * @param d statistic + * @param exact whether the probability should be calculated exact using {@link + * org.apache.commons.math3.fraction.BigFraction} everywhere at the expense of very slow + * execution time, or if {@code double} should be used convenient places to gain speed. + * Almost never choose {@code true} in real applications unless you are very sure; {@code + * true} is almost solely for verification purposes. + * @return the two-sided probability of {@code P(D_n < d)} + * @throws MathArithmeticException if algorithm fails to convert {@code h} to a {@link + * org.apache.commons.math3.fraction.BigFraction} in expressing {@code d} as {@code (k - h) + * / m} for integer {@code k, m} and {@code 0 <= h < 1}. + */ + public double cdf(double d, boolean exact) throws MathArithmeticException { + + final double ninv = 1 / ((double) n); + final double ninvhalf = 0.5 * ninv; + + if (d <= ninvhalf) { + + return 0; + + } else if (ninvhalf < d && d <= ninv) { + + double res = 1; + double f = 2 * d - ninv; + + // n! f^n = n*f * (n-1)*f * ... * 1*x + for (int i = 1; i <= n; ++i) { + res *= i * f; + } + + return res; + + } else if (1 - ninv <= d && d < 1) { + + return 1 - 2 * FastMath.pow(1 - d, n); + + } else if (1 <= d) { + + return 1; + } + + return exact ? exactK(d) : roundedK(d); + } + + /** + * Calculates the exact value of {@code P(D_n < d)} using method described in [1] and {@link + * org.apache.commons.math3.fraction.BigFraction} (see above). + * + * @param d statistic + * @return the two-sided probability of {@code P(D_n < d)} + * @throws MathArithmeticException if algorithm fails to convert {@code h} to a {@link + * org.apache.commons.math3.fraction.BigFraction} in expressing {@code d} as {@code (k - h) + * / m} for integer {@code k, m} and {@code 0 <= h < 1}. + */ + private double exactK(double d) throws MathArithmeticException { + + final int k = (int) FastMath.ceil(n * d); + + final FieldMatrix<BigFraction> H = this.createH(d); + final FieldMatrix<BigFraction> Hpower = H.power(n); + + BigFraction pFrac = Hpower.getEntry(k - 1, k - 1); + + for (int i = 1; i <= n; ++i) { + pFrac = pFrac.multiply(i).divide(n); + } + + /* + * BigFraction.doubleValue converts numerator to double and the + * denominator to double and divides afterwards. That gives NaN quite + * easy. This does not (scale is the number of digits): + */ + return pFrac.bigDecimalValue(20, BigDecimal.ROUND_HALF_UP).doubleValue(); + } + + /** + * Calculates {@code P(D_n < d)} using method described in [1] and doubles (see above). + * + * @param d statistic + * @return the two-sided probability of {@code P(D_n < d)} + * @throws MathArithmeticException if algorithm fails to convert {@code h} to a {@link + * org.apache.commons.math3.fraction.BigFraction} in expressing {@code d} as {@code (k - h) + * / m} for integer {@code k, m} and {@code 0 <= h < 1}. + */ + private double roundedK(double d) throws MathArithmeticException { + + final int k = (int) FastMath.ceil(n * d); + final FieldMatrix<BigFraction> HBigFraction = this.createH(d); + final int m = HBigFraction.getRowDimension(); + + /* + * Here the rounding part comes into play: use + * RealMatrix instead of FieldMatrix<BigFraction> + */ + final RealMatrix H = new Array2DRowRealMatrix(m, m); + + for (int i = 0; i < m; ++i) { + for (int j = 0; j < m; ++j) { + H.setEntry(i, j, HBigFraction.getEntry(i, j).doubleValue()); + } + } + + final RealMatrix Hpower = H.power(n); + + double pFrac = Hpower.getEntry(k - 1, k - 1); + + for (int i = 1; i <= n; ++i) { + pFrac *= (double) i / (double) n; + } + + return pFrac; + } + + /*** + * Creates {@code H} of size {@code m x m} as described in [1] (see above). + * + * @param d statistic + * @return H matrix + * @throws NumberIsTooLargeException if fractional part is greater than 1 + * @throws FractionConversionException if algorithm fails to convert + * {@code h} to a {@link org.apache.commons.math3.fraction.BigFraction} in + * expressing {@code d} as {@code (k - h) / m} for integer {@code k, m} and + * {@code 0 <= h < 1}. + */ + private FieldMatrix<BigFraction> createH(double d) + throws NumberIsTooLargeException, FractionConversionException { + + int k = (int) FastMath.ceil(n * d); + + int m = 2 * k - 1; + double hDouble = k - n * d; + + if (hDouble >= 1) { + throw new NumberIsTooLargeException(hDouble, 1.0, false); + } + + BigFraction h = null; + + try { + h = new BigFraction(hDouble, 1.0e-20, 10000); + } catch (FractionConversionException e1) { + try { + h = new BigFraction(hDouble, 1.0e-10, 10000); + } catch (FractionConversionException e2) { + h = new BigFraction(hDouble, 1.0e-5, 10000); + } + } + + final BigFraction[][] Hdata = new BigFraction[m][m]; + + /* + * Start by filling everything with either 0 or 1. + */ + for (int i = 0; i < m; ++i) { + for (int j = 0; j < m; ++j) { + if (i - j + 1 < 0) { + Hdata[i][j] = BigFraction.ZERO; + } else { + Hdata[i][j] = BigFraction.ONE; + } + } + } + + /* + * Setting up power-array to avoid calculating the same value twice: + * hPowers[0] = h^1 ... hPowers[m-1] = h^m + */ + final BigFraction[] hPowers = new BigFraction[m]; + hPowers[0] = h; + for (int i = 1; i < m; ++i) { + hPowers[i] = h.multiply(hPowers[i - 1]); + } + + /* + * First column and last row has special values (each other reversed). + */ + for (int i = 0; i < m; ++i) { + Hdata[i][0] = Hdata[i][0].subtract(hPowers[i]); + Hdata[m - 1][i] = Hdata[m - 1][i].subtract(hPowers[m - i - 1]); + } + + /* + * [1] states: "For 1/2 < h < 1 the bottom left element of the matrix + * should be (1 - 2*h^m + (2h - 1)^m )/m!" Since 0 <= h < 1, then if h > + * 1/2 is sufficient to check: + */ + if (h.compareTo(BigFraction.ONE_HALF) == 1) { + Hdata[m - 1][0] = Hdata[m - 1][0].add(h.multiply(2).subtract(1).pow(m)); + } + + /* + * Aside from the first column and last row, the (i, j)-th element is + * 1/(i - j + 1)! if i - j + 1 >= 0, else 0. 1's and 0's are already + * put, so only division with (i - j + 1)! is needed in the elements + * that have 1's. There is no need to calculate (i - j + 1)! and then + * divide - small steps avoid overflows. + * + * Note that i - j + 1 > 0 <=> i + 1 > j instead of j'ing all the way to + * m. Also note that it is started at g = 2 because dividing by 1 isn't + * really necessary. + */ + for (int i = 0; i < m; ++i) { + for (int j = 0; j < i + 1; ++j) { + if (i - j + 1 > 0) { + for (int g = 2; g <= i - j + 1; ++g) { + Hdata[i][j] = Hdata[i][j].divide(g); + } + } + } + } + + return new Array2DRowFieldMatrix<BigFraction>(BigFractionField.getInstance(), Hdata); + } +} diff --git a/src/main/java/org/apache/commons/math3/distribution/LaplaceDistribution.java b/src/main/java/org/apache/commons/math3/distribution/LaplaceDistribution.java new file mode 100644 index 0000000..2ce36fc --- /dev/null +++ b/src/main/java/org/apache/commons/math3/distribution/LaplaceDistribution.java @@ -0,0 +1,160 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.distribution; + +import org.apache.commons.math3.exception.NotStrictlyPositiveException; +import org.apache.commons.math3.exception.OutOfRangeException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.random.RandomGenerator; +import org.apache.commons.math3.random.Well19937c; +import org.apache.commons.math3.util.FastMath; + +/** + * This class implements the Laplace distribution. + * + * @see <a href="http://en.wikipedia.org/wiki/Laplace_distribution">Laplace distribution + * (Wikipedia)</a> + * @since 3.4 + */ +public class LaplaceDistribution extends AbstractRealDistribution { + + /** Serializable version identifier. */ + private static final long serialVersionUID = 20141003; + + /** The location parameter. */ + private final double mu; + + /** The scale parameter. */ + private final double beta; + + /** + * Build a new instance. + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link Well19937c} as + * random generator to be used for sampling only (see {@link #sample()} and {@link + * #sample(int)}). In case no sampling is needed for the created distribution, it is advised to + * pass {@code null} as random generator via the appropriate constructors to avoid the + * additional initialisation overhead. + * + * @param mu location parameter + * @param beta scale parameter (must be positive) + * @throws NotStrictlyPositiveException if {@code beta <= 0} + */ + public LaplaceDistribution(double mu, double beta) { + this(new Well19937c(), mu, beta); + } + + /** + * Build a new instance. + * + * @param rng Random number generator + * @param mu location parameter + * @param beta scale parameter (must be positive) + * @throws NotStrictlyPositiveException if {@code beta <= 0} + */ + public LaplaceDistribution(RandomGenerator rng, double mu, double beta) { + super(rng); + + if (beta <= 0.0) { + throw new NotStrictlyPositiveException(LocalizedFormats.NOT_POSITIVE_SCALE, beta); + } + + this.mu = mu; + this.beta = beta; + } + + /** + * Access the location parameter, {@code mu}. + * + * @return the location parameter. + */ + public double getLocation() { + return mu; + } + + /** + * Access the scale parameter, {@code beta}. + * + * @return the scale parameter. + */ + public double getScale() { + return beta; + } + + /** {@inheritDoc} */ + public double density(double x) { + return FastMath.exp(-FastMath.abs(x - mu) / beta) / (2.0 * beta); + } + + /** {@inheritDoc} */ + public double cumulativeProbability(double x) { + if (x <= mu) { + return FastMath.exp((x - mu) / beta) / 2.0; + } else { + return 1.0 - FastMath.exp((mu - x) / beta) / 2.0; + } + } + + /** {@inheritDoc} */ + @Override + public double inverseCumulativeProbability(double p) throws OutOfRangeException { + if (p < 0.0 || p > 1.0) { + throw new OutOfRangeException(p, 0.0, 1.0); + } else if (p == 0) { + return Double.NEGATIVE_INFINITY; + } else if (p == 1) { + return Double.POSITIVE_INFINITY; + } + double x = (p > 0.5) ? -Math.log(2.0 - 2.0 * p) : Math.log(2.0 * p); + return mu + beta * x; + } + + /** {@inheritDoc} */ + public double getNumericalMean() { + return mu; + } + + /** {@inheritDoc} */ + public double getNumericalVariance() { + return 2.0 * beta * beta; + } + + /** {@inheritDoc} */ + public double getSupportLowerBound() { + return Double.NEGATIVE_INFINITY; + } + + /** {@inheritDoc} */ + public double getSupportUpperBound() { + return Double.POSITIVE_INFINITY; + } + + /** {@inheritDoc} */ + public boolean isSupportLowerBoundInclusive() { + return false; + } + + /** {@inheritDoc} */ + public boolean isSupportUpperBoundInclusive() { + return false; + } + + /** {@inheritDoc} */ + public boolean isSupportConnected() { + return true; + } +} diff --git a/src/main/java/org/apache/commons/math3/distribution/LevyDistribution.java b/src/main/java/org/apache/commons/math3/distribution/LevyDistribution.java new file mode 100644 index 0000000..d76e993 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/distribution/LevyDistribution.java @@ -0,0 +1,197 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.distribution; + +import org.apache.commons.math3.exception.OutOfRangeException; +import org.apache.commons.math3.random.RandomGenerator; +import org.apache.commons.math3.random.Well19937c; +import org.apache.commons.math3.special.Erf; +import org.apache.commons.math3.util.FastMath; + +/** + * This class implements the <a href="http://en.wikipedia.org/wiki/L%C3%A9vy_distribution"> + * Lévy distribution</a>. + * + * @since 3.2 + */ +public class LevyDistribution extends AbstractRealDistribution { + + /** Serializable UID. */ + private static final long serialVersionUID = 20130314L; + + /** Location parameter. */ + private final double mu; + + /** Scale parameter. */ + private final double c; // Setting this to 1 returns a cumProb of 1.0 + + /** Half of c (for calculations). */ + private final double halfC; + + /** + * Build a new instance. + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link Well19937c} as + * random generator to be used for sampling only (see {@link #sample()} and {@link + * #sample(int)}). In case no sampling is needed for the created distribution, it is advised to + * pass {@code null} as random generator via the appropriate constructors to avoid the + * additional initialisation overhead. + * + * @param mu location parameter + * @param c scale parameter + * @since 3.4 + */ + public LevyDistribution(final double mu, final double c) { + this(new Well19937c(), mu, c); + } + + /** + * Creates a LevyDistribution. + * + * @param rng random generator to be used for sampling + * @param mu location + * @param c scale parameter + */ + public LevyDistribution(final RandomGenerator rng, final double mu, final double c) { + super(rng); + this.mu = mu; + this.c = c; + this.halfC = 0.5 * c; + } + + /** + * {@inheritDoc} + * + * <p>From Wikipedia: The probability density function of the Lévy distribution over the + * domain is + * + * <pre> + * f(x; μ, c) = √(c / 2π) * e<sup>-c / 2 (x - μ)</sup> / (x - μ)<sup>3/2</sup> + * </pre> + * + * <p>For this distribution, {@code X}, this method returns {@code P(X < x)}. If {@code x} is + * less than location parameter μ, {@code Double.NaN} is returned, as in these cases the + * distribution is not defined. + */ + public double density(final double x) { + if (x < mu) { + return Double.NaN; + } + + final double delta = x - mu; + final double f = halfC / delta; + return FastMath.sqrt(f / FastMath.PI) * FastMath.exp(-f) / delta; + } + + /** + * {@inheritDoc} + * + * <p>See documentation of {@link #density(double)} for computation details. + */ + @Override + public double logDensity(double x) { + if (x < mu) { + return Double.NaN; + } + + final double delta = x - mu; + final double f = halfC / delta; + return 0.5 * FastMath.log(f / FastMath.PI) - f - FastMath.log(delta); + } + + /** + * {@inheritDoc} + * + * <p>From Wikipedia: the cumulative distribution function is + * + * <pre> + * f(x; u, c) = erfc (√ (c / 2 (x - u ))) + * </pre> + */ + public double cumulativeProbability(final double x) { + if (x < mu) { + return Double.NaN; + } + return Erf.erfc(FastMath.sqrt(halfC / (x - mu))); + } + + /** {@inheritDoc} */ + @Override + public double inverseCumulativeProbability(final double p) throws OutOfRangeException { + if (p < 0.0 || p > 1.0) { + throw new OutOfRangeException(p, 0, 1); + } + final double t = Erf.erfcInv(p); + return mu + halfC / (t * t); + } + + /** + * Get the scale parameter of the distribution. + * + * @return scale parameter of the distribution + */ + public double getScale() { + return c; + } + + /** + * Get the location parameter of the distribution. + * + * @return location parameter of the distribution + */ + public double getLocation() { + return mu; + } + + /** {@inheritDoc} */ + public double getNumericalMean() { + return Double.POSITIVE_INFINITY; + } + + /** {@inheritDoc} */ + public double getNumericalVariance() { + return Double.POSITIVE_INFINITY; + } + + /** {@inheritDoc} */ + public double getSupportLowerBound() { + return mu; + } + + /** {@inheritDoc} */ + public double getSupportUpperBound() { + return Double.POSITIVE_INFINITY; + } + + /** {@inheritDoc} */ + public boolean isSupportLowerBoundInclusive() { + // there is a division by x-mu in the computation, so density + // is not finite at lower bound, bound must be excluded + return false; + } + + /** {@inheritDoc} */ + public boolean isSupportUpperBoundInclusive() { + // upper bound is infinite, so it must be excluded + return false; + } + + /** {@inheritDoc} */ + public boolean isSupportConnected() { + return true; + } +} diff --git a/src/main/java/org/apache/commons/math3/distribution/LogNormalDistribution.java b/src/main/java/org/apache/commons/math3/distribution/LogNormalDistribution.java new file mode 100644 index 0000000..e6a6deb --- /dev/null +++ b/src/main/java/org/apache/commons/math3/distribution/LogNormalDistribution.java @@ -0,0 +1,349 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.math3.distribution; + +import org.apache.commons.math3.exception.NotStrictlyPositiveException; +import org.apache.commons.math3.exception.NumberIsTooLargeException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.random.RandomGenerator; +import org.apache.commons.math3.random.Well19937c; +import org.apache.commons.math3.special.Erf; +import org.apache.commons.math3.util.FastMath; + +/** + * Implementation of the log-normal (gaussian) distribution. + * + * <p><strong>Parameters:</strong> {@code X} is log-normally distributed if its natural logarithm + * {@code log(X)} is normally distributed. The probability distribution function of {@code X} is + * given by (for {@code x > 0}) + * + * <p>{@code exp(-0.5 * ((ln(x) - m) / s)^2) / (s * sqrt(2 * pi) * x)} + * + * <ul> + * <li>{@code m} is the <em>scale</em> parameter: this is the mean of the normally distributed + * natural logarithm of this distribution, + * <li>{@code s} is the <em>shape</em> parameter: this is the standard deviation of the normally + * distributed natural logarithm of this distribution. + * </ul> + * + * @see <a href="http://en.wikipedia.org/wiki/Log-normal_distribution">Log-normal distribution + * (Wikipedia)</a> + * @see <a href="http://mathworld.wolfram.com/LogNormalDistribution.html">Log Normal distribution + * (MathWorld)</a> + * @since 3.0 + */ +public class LogNormalDistribution extends AbstractRealDistribution { + /** Default inverse cumulative probability accuracy. */ + public static final double DEFAULT_INVERSE_ABSOLUTE_ACCURACY = 1e-9; + + /** Serializable version identifier. */ + private static final long serialVersionUID = 20120112; + + /** √(2 π) */ + private static final double SQRT2PI = FastMath.sqrt(2 * FastMath.PI); + + /** √(2) */ + private static final double SQRT2 = FastMath.sqrt(2.0); + + /** The scale parameter of this distribution. */ + private final double scale; + + /** The shape parameter of this distribution. */ + private final double shape; + + /** The value of {@code log(shape) + 0.5 * log(2*PI)} stored for faster computation. */ + private final double logShapePlusHalfLog2Pi; + + /** Inverse cumulative probability accuracy. */ + private final double solverAbsoluteAccuracy; + + /** + * Create a log-normal distribution, where the mean and standard deviation of the {@link + * NormalDistribution normally distributed} natural logarithm of the log-normal distribution are + * equal to zero and one respectively. In other words, the scale of the returned distribution is + * {@code 0}, while its shape is {@code 1}. + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link Well19937c} as + * random generator to be used for sampling only (see {@link #sample()} and {@link + * #sample(int)}). In case no sampling is needed for the created distribution, it is advised to + * pass {@code null} as random generator via the appropriate constructors to avoid the + * additional initialisation overhead. + */ + public LogNormalDistribution() { + this(0, 1); + } + + /** + * Create a log-normal distribution using the specified scale and shape. + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link Well19937c} as + * random generator to be used for sampling only (see {@link #sample()} and {@link + * #sample(int)}). In case no sampling is needed for the created distribution, it is advised to + * pass {@code null} as random generator via the appropriate constructors to avoid the + * additional initialisation overhead. + * + * @param scale the scale parameter of this distribution + * @param shape the shape parameter of this distribution + * @throws NotStrictlyPositiveException if {@code shape <= 0}. + */ + public LogNormalDistribution(double scale, double shape) throws NotStrictlyPositiveException { + this(scale, shape, DEFAULT_INVERSE_ABSOLUTE_ACCURACY); + } + + /** + * Create a log-normal distribution using the specified scale, shape and inverse cumulative + * distribution accuracy. + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link Well19937c} as + * random generator to be used for sampling only (see {@link #sample()} and {@link + * #sample(int)}). In case no sampling is needed for the created distribution, it is advised to + * pass {@code null} as random generator via the appropriate constructors to avoid the + * additional initialisation overhead. + * + * @param scale the scale parameter of this distribution + * @param shape the shape parameter of this distribution + * @param inverseCumAccuracy Inverse cumulative probability accuracy. + * @throws NotStrictlyPositiveException if {@code shape <= 0}. + */ + public LogNormalDistribution(double scale, double shape, double inverseCumAccuracy) + throws NotStrictlyPositiveException { + this(new Well19937c(), scale, shape, inverseCumAccuracy); + } + + /** + * Creates a log-normal distribution. + * + * @param rng Random number generator. + * @param scale Scale parameter of this distribution. + * @param shape Shape parameter of this distribution. + * @throws NotStrictlyPositiveException if {@code shape <= 0}. + * @since 3.3 + */ + public LogNormalDistribution(RandomGenerator rng, double scale, double shape) + throws NotStrictlyPositiveException { + this(rng, scale, shape, DEFAULT_INVERSE_ABSOLUTE_ACCURACY); + } + + /** + * Creates a log-normal distribution. + * + * @param rng Random number generator. + * @param scale Scale parameter of this distribution. + * @param shape Shape parameter of this distribution. + * @param inverseCumAccuracy Inverse cumulative probability accuracy. + * @throws NotStrictlyPositiveException if {@code shape <= 0}. + * @since 3.1 + */ + public LogNormalDistribution( + RandomGenerator rng, double scale, double shape, double inverseCumAccuracy) + throws NotStrictlyPositiveException { + super(rng); + + if (shape <= 0) { + throw new NotStrictlyPositiveException(LocalizedFormats.SHAPE, shape); + } + + this.scale = scale; + this.shape = shape; + this.logShapePlusHalfLog2Pi = FastMath.log(shape) + 0.5 * FastMath.log(2 * FastMath.PI); + this.solverAbsoluteAccuracy = inverseCumAccuracy; + } + + /** + * Returns the scale parameter of this distribution. + * + * @return the scale parameter + */ + public double getScale() { + return scale; + } + + /** + * Returns the shape parameter of this distribution. + * + * @return the shape parameter + */ + public double getShape() { + return shape; + } + + /** + * {@inheritDoc} + * + * <p>For scale {@code m}, and shape {@code s} of this distribution, the PDF is given by + * + * <ul> + * <li>{@code 0} if {@code x <= 0}, + * <li>{@code exp(-0.5 * ((ln(x) - m) / s)^2) / (s * sqrt(2 * pi) * x)} otherwise. + * </ul> + */ + public double density(double x) { + if (x <= 0) { + return 0; + } + final double x0 = FastMath.log(x) - scale; + final double x1 = x0 / shape; + return FastMath.exp(-0.5 * x1 * x1) / (shape * SQRT2PI * x); + } + + /** + * {@inheritDoc} + * + * <p>See documentation of {@link #density(double)} for computation details. + */ + @Override + public double logDensity(double x) { + if (x <= 0) { + return Double.NEGATIVE_INFINITY; + } + final double logX = FastMath.log(x); + final double x0 = logX - scale; + final double x1 = x0 / shape; + return -0.5 * x1 * x1 - (logShapePlusHalfLog2Pi + logX); + } + + /** + * {@inheritDoc} + * + * <p>For scale {@code m}, and shape {@code s} of this distribution, the CDF is given by + * + * <ul> + * <li>{@code 0} if {@code x <= 0}, + * <li>{@code 0} if {@code ln(x) - m < 0} and {@code m - ln(x) > 40 * s}, as in these cases + * the actual value is within {@code Double.MIN_VALUE} of 0, + * <li>{@code 1} if {@code ln(x) - m >= 0} and {@code ln(x) - m > 40 * s}, as in these cases + * the actual value is within {@code Double.MIN_VALUE} of 1, + * <li>{@code 0.5 + 0.5 * erf((ln(x) - m) / (s * sqrt(2))} otherwise. + * </ul> + */ + public double cumulativeProbability(double x) { + if (x <= 0) { + return 0; + } + final double dev = FastMath.log(x) - scale; + if (FastMath.abs(dev) > 40 * shape) { + return dev < 0 ? 0.0d : 1.0d; + } + return 0.5 + 0.5 * Erf.erf(dev / (shape * SQRT2)); + } + + /** + * {@inheritDoc} + * + * @deprecated See {@link RealDistribution#cumulativeProbability(double,double)} + */ + @Override + @Deprecated + public double cumulativeProbability(double x0, double x1) throws NumberIsTooLargeException { + return probability(x0, x1); + } + + /** {@inheritDoc} */ + @Override + public double probability(double x0, double x1) throws NumberIsTooLargeException { + if (x0 > x1) { + throw new NumberIsTooLargeException( + LocalizedFormats.LOWER_ENDPOINT_ABOVE_UPPER_ENDPOINT, x0, x1, true); + } + if (x0 <= 0 || x1 <= 0) { + return super.probability(x0, x1); + } + final double denom = shape * SQRT2; + final double v0 = (FastMath.log(x0) - scale) / denom; + final double v1 = (FastMath.log(x1) - scale) / denom; + return 0.5 * Erf.erf(v0, v1); + } + + /** {@inheritDoc} */ + @Override + protected double getSolverAbsoluteAccuracy() { + return solverAbsoluteAccuracy; + } + + /** + * {@inheritDoc} + * + * <p>For scale {@code m} and shape {@code s}, the mean is {@code exp(m + s^2 / 2)}. + */ + public double getNumericalMean() { + double s = shape; + return FastMath.exp(scale + (s * s / 2)); + } + + /** + * {@inheritDoc} + * + * <p>For scale {@code m} and shape {@code s}, the variance is {@code (exp(s^2) - 1) * exp(2 * m + * + s^2)}. + */ + public double getNumericalVariance() { + final double s = shape; + final double ss = s * s; + return (FastMath.expm1(ss)) * FastMath.exp(2 * scale + ss); + } + + /** + * {@inheritDoc} + * + * <p>The lower bound of the support is always 0 no matter the parameters. + * + * @return lower bound of the support (always 0) + */ + public double getSupportLowerBound() { + return 0; + } + + /** + * {@inheritDoc} + * + * <p>The upper bound of the support is always positive infinity no matter the parameters. + * + * @return upper bound of the support (always {@code Double.POSITIVE_INFINITY}) + */ + public double getSupportUpperBound() { + return Double.POSITIVE_INFINITY; + } + + /** {@inheritDoc} */ + public boolean isSupportLowerBoundInclusive() { + return true; + } + + /** {@inheritDoc} */ + public boolean isSupportUpperBoundInclusive() { + return false; + } + + /** + * {@inheritDoc} + * + * <p>The support of this distribution is connected. + * + * @return {@code true} + */ + public boolean isSupportConnected() { + return true; + } + + /** {@inheritDoc} */ + @Override + public double sample() { + final double n = random.nextGaussian(); + return FastMath.exp(scale + shape * n); + } +} diff --git a/src/main/java/org/apache/commons/math3/distribution/LogisticDistribution.java b/src/main/java/org/apache/commons/math3/distribution/LogisticDistribution.java new file mode 100644 index 0000000..d2e7504 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/distribution/LogisticDistribution.java @@ -0,0 +1,161 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.distribution; + +import org.apache.commons.math3.exception.NotStrictlyPositiveException; +import org.apache.commons.math3.exception.OutOfRangeException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.random.RandomGenerator; +import org.apache.commons.math3.random.Well19937c; +import org.apache.commons.math3.util.FastMath; +import org.apache.commons.math3.util.MathUtils; + +/** + * This class implements the Logistic distribution. + * + * @see <a href="http://en.wikipedia.org/wiki/Logistic_distribution">Logistic Distribution + * (Wikipedia)</a> + * @see <a href="http://mathworld.wolfram.com/LogisticDistribution.html">Logistic Distribution + * (Mathworld)</a> + * @since 3.4 + */ +public class LogisticDistribution extends AbstractRealDistribution { + + /** Serializable version identifier. */ + private static final long serialVersionUID = 20141003; + + /** The location parameter. */ + private final double mu; + + /** The scale parameter. */ + private final double s; + + /** + * Build a new instance. + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link Well19937c} as + * random generator to be used for sampling only (see {@link #sample()} and {@link + * #sample(int)}). In case no sampling is needed for the created distribution, it is advised to + * pass {@code null} as random generator via the appropriate constructors to avoid the + * additional initialisation overhead. + * + * @param mu location parameter + * @param s scale parameter (must be positive) + * @throws NotStrictlyPositiveException if {@code beta <= 0} + */ + public LogisticDistribution(double mu, double s) { + this(new Well19937c(), mu, s); + } + + /** + * Build a new instance. + * + * @param rng Random number generator + * @param mu location parameter + * @param s scale parameter (must be positive) + * @throws NotStrictlyPositiveException if {@code beta <= 0} + */ + public LogisticDistribution(RandomGenerator rng, double mu, double s) { + super(rng); + + if (s <= 0.0) { + throw new NotStrictlyPositiveException(LocalizedFormats.NOT_POSITIVE_SCALE, s); + } + + this.mu = mu; + this.s = s; + } + + /** + * Access the location parameter, {@code mu}. + * + * @return the location parameter. + */ + public double getLocation() { + return mu; + } + + /** + * Access the scale parameter, {@code s}. + * + * @return the scale parameter. + */ + public double getScale() { + return s; + } + + /** {@inheritDoc} */ + public double density(double x) { + double z = (x - mu) / s; + double v = FastMath.exp(-z); + return 1 / s * v / ((1.0 + v) * (1.0 + v)); + } + + /** {@inheritDoc} */ + public double cumulativeProbability(double x) { + double z = 1 / s * (x - mu); + return 1.0 / (1.0 + FastMath.exp(-z)); + } + + /** {@inheritDoc} */ + @Override + public double inverseCumulativeProbability(double p) throws OutOfRangeException { + if (p < 0.0 || p > 1.0) { + throw new OutOfRangeException(p, 0.0, 1.0); + } else if (p == 0) { + return 0.0; + } else if (p == 1) { + return Double.POSITIVE_INFINITY; + } + return s * Math.log(p / (1.0 - p)) + mu; + } + + /** {@inheritDoc} */ + public double getNumericalMean() { + return mu; + } + + /** {@inheritDoc} */ + public double getNumericalVariance() { + return (MathUtils.PI_SQUARED / 3.0) * (1.0 / (s * s)); + } + + /** {@inheritDoc} */ + public double getSupportLowerBound() { + return Double.NEGATIVE_INFINITY; + } + + /** {@inheritDoc} */ + public double getSupportUpperBound() { + return Double.POSITIVE_INFINITY; + } + + /** {@inheritDoc} */ + public boolean isSupportLowerBoundInclusive() { + return false; + } + + /** {@inheritDoc} */ + public boolean isSupportUpperBoundInclusive() { + return false; + } + + /** {@inheritDoc} */ + public boolean isSupportConnected() { + return true; + } +} diff --git a/src/main/java/org/apache/commons/math3/distribution/MixtureMultivariateNormalDistribution.java b/src/main/java/org/apache/commons/math3/distribution/MixtureMultivariateNormalDistribution.java new file mode 100644 index 0000000..547d349 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/distribution/MixtureMultivariateNormalDistribution.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.distribution; + +import org.apache.commons.math3.exception.DimensionMismatchException; +import org.apache.commons.math3.exception.NotPositiveException; +import org.apache.commons.math3.random.RandomGenerator; +import org.apache.commons.math3.util.Pair; + +import java.util.ArrayList; +import java.util.List; + +/** + * Multivariate normal mixture distribution. This class is mainly syntactic sugar. + * + * @see MixtureMultivariateRealDistribution + * @since 3.2 + */ +public class MixtureMultivariateNormalDistribution + extends MixtureMultivariateRealDistribution<MultivariateNormalDistribution> { + + /** + * Creates a multivariate normal mixture distribution. + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link + * org.apache.commons.math3.random.Well19937c Well19937c} as random generator to be used for + * sampling only (see {@link #sample()} and {@link #sample(int)}). In case no sampling is needed + * for the created distribution, it is advised to pass {@code null} as random generator via the + * appropriate constructors to avoid the additional initialisation overhead. + * + * @param weights Weights of each component. + * @param means Mean vector for each component. + * @param covariances Covariance matrix for each component. + */ + public MixtureMultivariateNormalDistribution( + double[] weights, double[][] means, double[][][] covariances) { + super(createComponents(weights, means, covariances)); + } + + /** + * Creates a mixture model from a list of distributions and their associated weights. + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link + * org.apache.commons.math3.random.Well19937c Well19937c} as random generator to be used for + * sampling only (see {@link #sample()} and {@link #sample(int)}). In case no sampling is needed + * for the created distribution, it is advised to pass {@code null} as random generator via the + * appropriate constructors to avoid the additional initialisation overhead. + * + * @param components List of (weight, distribution) pairs from which to sample. + */ + public MixtureMultivariateNormalDistribution( + List<Pair<Double, MultivariateNormalDistribution>> components) { + super(components); + } + + /** + * Creates a mixture model from a list of distributions and their associated weights. + * + * @param rng Random number generator. + * @param components Distributions from which to sample. + * @throws NotPositiveException if any of the weights is negative. + * @throws DimensionMismatchException if not all components have the same number of variables. + */ + public MixtureMultivariateNormalDistribution( + RandomGenerator rng, List<Pair<Double, MultivariateNormalDistribution>> components) + throws NotPositiveException, DimensionMismatchException { + super(rng, components); + } + + /** + * @param weights Weights of each component. + * @param means Mean vector for each component. + * @param covariances Covariance matrix for each component. + * @return the list of components. + */ + private static List<Pair<Double, MultivariateNormalDistribution>> createComponents( + double[] weights, double[][] means, double[][][] covariances) { + final List<Pair<Double, MultivariateNormalDistribution>> mvns = + new ArrayList<Pair<Double, MultivariateNormalDistribution>>(weights.length); + + for (int i = 0; i < weights.length; i++) { + final MultivariateNormalDistribution dist = + new MultivariateNormalDistribution(means[i], covariances[i]); + + mvns.add(new Pair<Double, MultivariateNormalDistribution>(weights[i], dist)); + } + + return mvns; + } +} diff --git a/src/main/java/org/apache/commons/math3/distribution/MixtureMultivariateRealDistribution.java b/src/main/java/org/apache/commons/math3/distribution/MixtureMultivariateRealDistribution.java new file mode 100644 index 0000000..4c65b75 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/distribution/MixtureMultivariateRealDistribution.java @@ -0,0 +1,167 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.distribution; + +import org.apache.commons.math3.exception.DimensionMismatchException; +import org.apache.commons.math3.exception.MathArithmeticException; +import org.apache.commons.math3.exception.NotPositiveException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.random.RandomGenerator; +import org.apache.commons.math3.random.Well19937c; +import org.apache.commons.math3.util.Pair; + +import java.util.ArrayList; +import java.util.List; + +/** + * Class for representing <a href="http://en.wikipedia.org/wiki/Mixture_model">mixture model</a> + * distributions. + * + * @param <T> Type of the mixture components. + * @since 3.1 + */ +public class MixtureMultivariateRealDistribution<T extends MultivariateRealDistribution> + extends AbstractMultivariateRealDistribution { + /** Normalized weight of each mixture component. */ + private final double[] weight; + + /** Mixture components. */ + private final List<T> distribution; + + /** + * Creates a mixture model from a list of distributions and their associated weights. + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link Well19937c} as + * random generator to be used for sampling only (see {@link #sample()} and {@link + * #sample(int)}). In case no sampling is needed for the created distribution, it is advised to + * pass {@code null} as random generator via the appropriate constructors to avoid the + * additional initialisation overhead. + * + * @param components List of (weight, distribution) pairs from which to sample. + */ + public MixtureMultivariateRealDistribution(List<Pair<Double, T>> components) { + this(new Well19937c(), components); + } + + /** + * Creates a mixture model from a list of distributions and their associated weights. + * + * @param rng Random number generator. + * @param components Distributions from which to sample. + * @throws NotPositiveException if any of the weights is negative. + * @throws DimensionMismatchException if not all components have the same number of variables. + */ + public MixtureMultivariateRealDistribution( + RandomGenerator rng, List<Pair<Double, T>> components) { + super(rng, components.get(0).getSecond().getDimension()); + + final int numComp = components.size(); + final int dim = getDimension(); + double weightSum = 0; + for (int i = 0; i < numComp; i++) { + final Pair<Double, T> comp = components.get(i); + if (comp.getSecond().getDimension() != dim) { + throw new DimensionMismatchException(comp.getSecond().getDimension(), dim); + } + if (comp.getFirst() < 0) { + throw new NotPositiveException(comp.getFirst()); + } + weightSum += comp.getFirst(); + } + + // Check for overflow. + if (Double.isInfinite(weightSum)) { + throw new MathArithmeticException(LocalizedFormats.OVERFLOW); + } + + // Store each distribution and its normalized weight. + distribution = new ArrayList<T>(); + weight = new double[numComp]; + for (int i = 0; i < numComp; i++) { + final Pair<Double, T> comp = components.get(i); + weight[i] = comp.getFirst() / weightSum; + distribution.add(comp.getSecond()); + } + } + + /** {@inheritDoc} */ + public double density(final double[] values) { + double p = 0; + for (int i = 0; i < weight.length; i++) { + p += weight[i] * distribution.get(i).density(values); + } + return p; + } + + /** {@inheritDoc} */ + @Override + public double[] sample() { + // Sampled values. + double[] vals = null; + + // Determine which component to sample from. + final double randomValue = random.nextDouble(); + double sum = 0; + + for (int i = 0; i < weight.length; i++) { + sum += weight[i]; + if (randomValue <= sum) { + // pick model i + vals = distribution.get(i).sample(); + break; + } + } + + if (vals == null) { + // This should never happen, but it ensures we won't return a null in + // case the loop above has some floating point inequality problem on + // the final iteration. + vals = distribution.get(weight.length - 1).sample(); + } + + return vals; + } + + /** {@inheritDoc} */ + @Override + public void reseedRandomGenerator(long seed) { + // Seed needs to be propagated to underlying components + // in order to maintain consistency between runs. + super.reseedRandomGenerator(seed); + + for (int i = 0; i < distribution.size(); i++) { + // Make each component's seed different in order to avoid + // using the same sequence of random numbers. + distribution.get(i).reseedRandomGenerator(i + 1 + seed); + } + } + + /** + * Gets the distributions that make up the mixture model. + * + * @return the component distributions and associated weights. + */ + public List<Pair<Double, T>> getComponents() { + final List<Pair<Double, T>> list = new ArrayList<Pair<Double, T>>(weight.length); + + for (int i = 0; i < weight.length; i++) { + list.add(new Pair<Double, T>(weight[i], distribution.get(i))); + } + + return list; + } +} diff --git a/src/main/java/org/apache/commons/math3/distribution/MultivariateNormalDistribution.java b/src/main/java/org/apache/commons/math3/distribution/MultivariateNormalDistribution.java new file mode 100644 index 0000000..388761a --- /dev/null +++ b/src/main/java/org/apache/commons/math3/distribution/MultivariateNormalDistribution.java @@ -0,0 +1,237 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.distribution; + +import org.apache.commons.math3.exception.DimensionMismatchException; +import org.apache.commons.math3.linear.Array2DRowRealMatrix; +import org.apache.commons.math3.linear.EigenDecomposition; +import org.apache.commons.math3.linear.NonPositiveDefiniteMatrixException; +import org.apache.commons.math3.linear.RealMatrix; +import org.apache.commons.math3.linear.SingularMatrixException; +import org.apache.commons.math3.random.RandomGenerator; +import org.apache.commons.math3.random.Well19937c; +import org.apache.commons.math3.util.FastMath; +import org.apache.commons.math3.util.MathArrays; + +/** + * Implementation of the multivariate normal (Gaussian) distribution. + * + * @see <a href="http://en.wikipedia.org/wiki/Multivariate_normal_distribution">Multivariate normal + * distribution (Wikipedia)</a> + * @see <a href="http://mathworld.wolfram.com/MultivariateNormalDistribution.html">Multivariate + * normal distribution (MathWorld)</a> + * @since 3.1 + */ +public class MultivariateNormalDistribution extends AbstractMultivariateRealDistribution { + /** Vector of means. */ + private final double[] means; + + /** Covariance matrix. */ + private final RealMatrix covarianceMatrix; + + /** The matrix inverse of the covariance matrix. */ + private final RealMatrix covarianceMatrixInverse; + + /** The determinant of the covariance matrix. */ + private final double covarianceMatrixDeterminant; + + /** Matrix used in computation of samples. */ + private final RealMatrix samplingMatrix; + + /** + * Creates a multivariate normal distribution with the given mean vector and covariance matrix. + * <br> + * The number of dimensions is equal to the length of the mean vector and to the number of rows + * and columns of the covariance matrix. It is frequently written as "p" in formulae. + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link Well19937c} as + * random generator to be used for sampling only (see {@link #sample()} and {@link + * #sample(int)}). In case no sampling is needed for the created distribution, it is advised to + * pass {@code null} as random generator via the appropriate constructors to avoid the + * additional initialisation overhead. + * + * @param means Vector of means. + * @param covariances Covariance matrix. + * @throws DimensionMismatchException if the arrays length are inconsistent. + * @throws SingularMatrixException if the eigenvalue decomposition cannot be performed on the + * provided covariance matrix. + * @throws NonPositiveDefiniteMatrixException if any of the eigenvalues is negative. + */ + public MultivariateNormalDistribution(final double[] means, final double[][] covariances) + throws SingularMatrixException, + DimensionMismatchException, + NonPositiveDefiniteMatrixException { + this(new Well19937c(), means, covariances); + } + + /** + * Creates a multivariate normal distribution with the given mean vector and covariance matrix. + * <br> + * The number of dimensions is equal to the length of the mean vector and to the number of rows + * and columns of the covariance matrix. It is frequently written as "p" in formulae. + * + * @param rng Random Number Generator. + * @param means Vector of means. + * @param covariances Covariance matrix. + * @throws DimensionMismatchException if the arrays length are inconsistent. + * @throws SingularMatrixException if the eigenvalue decomposition cannot be performed on the + * provided covariance matrix. + * @throws NonPositiveDefiniteMatrixException if any of the eigenvalues is negative. + */ + public MultivariateNormalDistribution( + RandomGenerator rng, final double[] means, final double[][] covariances) + throws SingularMatrixException, + DimensionMismatchException, + NonPositiveDefiniteMatrixException { + super(rng, means.length); + + final int dim = means.length; + + if (covariances.length != dim) { + throw new DimensionMismatchException(covariances.length, dim); + } + + for (int i = 0; i < dim; i++) { + if (dim != covariances[i].length) { + throw new DimensionMismatchException(covariances[i].length, dim); + } + } + + this.means = MathArrays.copyOf(means); + + covarianceMatrix = new Array2DRowRealMatrix(covariances); + + // Covariance matrix eigen decomposition. + final EigenDecomposition covMatDec = new EigenDecomposition(covarianceMatrix); + + // Compute and store the inverse. + covarianceMatrixInverse = covMatDec.getSolver().getInverse(); + // Compute and store the determinant. + covarianceMatrixDeterminant = covMatDec.getDeterminant(); + + // Eigenvalues of the covariance matrix. + final double[] covMatEigenvalues = covMatDec.getRealEigenvalues(); + + for (int i = 0; i < covMatEigenvalues.length; i++) { + if (covMatEigenvalues[i] < 0) { + throw new NonPositiveDefiniteMatrixException(covMatEigenvalues[i], i, 0); + } + } + + // Matrix where each column is an eigenvector of the covariance matrix. + final Array2DRowRealMatrix covMatEigenvectors = new Array2DRowRealMatrix(dim, dim); + for (int v = 0; v < dim; v++) { + final double[] evec = covMatDec.getEigenvector(v).toArray(); + covMatEigenvectors.setColumn(v, evec); + } + + final RealMatrix tmpMatrix = covMatEigenvectors.transpose(); + + // Scale each eigenvector by the square root of its eigenvalue. + for (int row = 0; row < dim; row++) { + final double factor = FastMath.sqrt(covMatEigenvalues[row]); + for (int col = 0; col < dim; col++) { + tmpMatrix.multiplyEntry(row, col, factor); + } + } + + samplingMatrix = covMatEigenvectors.multiply(tmpMatrix); + } + + /** + * Gets the mean vector. + * + * @return the mean vector. + */ + public double[] getMeans() { + return MathArrays.copyOf(means); + } + + /** + * Gets the covariance matrix. + * + * @return the covariance matrix. + */ + public RealMatrix getCovariances() { + return covarianceMatrix.copy(); + } + + /** {@inheritDoc} */ + public double density(final double[] vals) throws DimensionMismatchException { + final int dim = getDimension(); + if (vals.length != dim) { + throw new DimensionMismatchException(vals.length, dim); + } + + return FastMath.pow(2 * FastMath.PI, -0.5 * dim) + * FastMath.pow(covarianceMatrixDeterminant, -0.5) + * getExponentTerm(vals); + } + + /** + * Gets the square root of each element on the diagonal of the covariance matrix. + * + * @return the standard deviations. + */ + public double[] getStandardDeviations() { + final int dim = getDimension(); + final double[] std = new double[dim]; + final double[][] s = covarianceMatrix.getData(); + for (int i = 0; i < dim; i++) { + std[i] = FastMath.sqrt(s[i][i]); + } + return std; + } + + /** {@inheritDoc} */ + @Override + public double[] sample() { + final int dim = getDimension(); + final double[] normalVals = new double[dim]; + + for (int i = 0; i < dim; i++) { + normalVals[i] = random.nextGaussian(); + } + + final double[] vals = samplingMatrix.operate(normalVals); + + for (int i = 0; i < dim; i++) { + vals[i] += means[i]; + } + + return vals; + } + + /** + * Computes the term used in the exponent (see definition of the distribution). + * + * @param values Values at which to compute density. + * @return the multiplication factor of density calculations. + */ + private double getExponentTerm(final double[] values) { + final double[] centered = new double[values.length]; + for (int i = 0; i < centered.length; i++) { + centered[i] = values[i] - getMeans()[i]; + } + final double[] preMultiplied = covarianceMatrixInverse.preMultiply(centered); + double sum = 0; + for (int i = 0; i < preMultiplied.length; i++) { + sum += preMultiplied[i] * centered[i]; + } + return FastMath.exp(-0.5 * sum); + } +} diff --git a/src/main/java/org/apache/commons/math3/distribution/MultivariateRealDistribution.java b/src/main/java/org/apache/commons/math3/distribution/MultivariateRealDistribution.java new file mode 100644 index 0000000..050cfd5 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/distribution/MultivariateRealDistribution.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.distribution; + +import org.apache.commons.math3.exception.NotStrictlyPositiveException; + +/** + * Base interface for multivariate distributions on the reals. + * + * <p>This is based largely on the RealDistribution interface, but cumulative distribution functions + * are not required because they are often quite difficult to compute for multivariate + * distributions. + * + * @since 3.1 + */ +public interface MultivariateRealDistribution { + /** + * Returns the probability density function (PDF) of this distribution evaluated at the + * specified point {@code x}. In general, the PDF is the derivative of the cumulative + * distribution function. If the derivative does not exist at {@code x}, then an appropriate + * replacement should be returned, e.g. {@code Double.POSITIVE_INFINITY}, {@code Double.NaN}, or + * the limit inferior or limit superior of the difference quotient. + * + * @param x Point at which the PDF is evaluated. + * @return the value of the probability density function at point {@code x}. + */ + double density(double[] x); + + /** + * Reseeds the random generator used to generate samples. + * + * @param seed Seed with which to initialize the random number generator. + */ + void reseedRandomGenerator(long seed); + + /** + * Gets the number of random variables of the distribution. It is the size of the array returned + * by the {@link #sample() sample} method. + * + * @return the number of variables. + */ + int getDimension(); + + /** + * Generates a random value vector sampled from this distribution. + * + * @return a random value vector. + */ + double[] sample(); + + /** + * Generates a list of a random value vectors from the distribution. + * + * @param sampleSize the number of random vectors to generate. + * @return an array representing the random samples. + * @throws org.apache.commons.math3.exception.NotStrictlyPositiveException if {@code sampleSize} + * is not positive. + * @see #sample() + */ + double[][] sample(int sampleSize) throws NotStrictlyPositiveException; +} diff --git a/src/main/java/org/apache/commons/math3/distribution/NakagamiDistribution.java b/src/main/java/org/apache/commons/math3/distribution/NakagamiDistribution.java new file mode 100644 index 0000000..298cb30 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/distribution/NakagamiDistribution.java @@ -0,0 +1,192 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.distribution; + +import org.apache.commons.math3.exception.NotStrictlyPositiveException; +import org.apache.commons.math3.exception.NumberIsTooSmallException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.random.RandomGenerator; +import org.apache.commons.math3.random.Well19937c; +import org.apache.commons.math3.special.Gamma; +import org.apache.commons.math3.util.FastMath; + +/** + * This class implements the Nakagami distribution. + * + * @see <a href="http://en.wikipedia.org/wiki/Nakagami_distribution">Nakagami Distribution + * (Wikipedia)</a> + * @since 3.4 + */ +public class NakagamiDistribution extends AbstractRealDistribution { + + /** Default inverse cumulative probability accuracy. */ + public static final double DEFAULT_INVERSE_ABSOLUTE_ACCURACY = 1e-9; + + /** Serializable version identifier. */ + private static final long serialVersionUID = 20141003; + + /** The shape parameter. */ + private final double mu; + + /** The scale parameter. */ + private final double omega; + + /** Inverse cumulative probability accuracy. */ + private final double inverseAbsoluteAccuracy; + + /** + * Build a new instance. + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link Well19937c} as + * random generator to be used for sampling only (see {@link #sample()} and {@link + * #sample(int)}). In case no sampling is needed for the created distribution, it is advised to + * pass {@code null} as random generator via the appropriate constructors to avoid the + * additional initialisation overhead. + * + * @param mu shape parameter + * @param omega scale parameter (must be positive) + * @throws NumberIsTooSmallException if {@code mu < 0.5} + * @throws NotStrictlyPositiveException if {@code omega <= 0} + */ + public NakagamiDistribution(double mu, double omega) { + this(mu, omega, DEFAULT_INVERSE_ABSOLUTE_ACCURACY); + } + + /** + * Build a new instance. + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link Well19937c} as + * random generator to be used for sampling only (see {@link #sample()} and {@link + * #sample(int)}). In case no sampling is needed for the created distribution, it is advised to + * pass {@code null} as random generator via the appropriate constructors to avoid the + * additional initialisation overhead. + * + * @param mu shape parameter + * @param omega scale parameter (must be positive) + * @param inverseAbsoluteAccuracy the maximum absolute error in inverse cumulative probability + * estimates (defaults to {@link #DEFAULT_INVERSE_ABSOLUTE_ACCURACY}). + * @throws NumberIsTooSmallException if {@code mu < 0.5} + * @throws NotStrictlyPositiveException if {@code omega <= 0} + */ + public NakagamiDistribution(double mu, double omega, double inverseAbsoluteAccuracy) { + this(new Well19937c(), mu, omega, inverseAbsoluteAccuracy); + } + + /** + * Build a new instance. + * + * @param rng Random number generator + * @param mu shape parameter + * @param omega scale parameter (must be positive) + * @param inverseAbsoluteAccuracy the maximum absolute error in inverse cumulative probability + * estimates (defaults to {@link #DEFAULT_INVERSE_ABSOLUTE_ACCURACY}). + * @throws NumberIsTooSmallException if {@code mu < 0.5} + * @throws NotStrictlyPositiveException if {@code omega <= 0} + */ + public NakagamiDistribution( + RandomGenerator rng, double mu, double omega, double inverseAbsoluteAccuracy) { + super(rng); + + if (mu < 0.5) { + throw new NumberIsTooSmallException(mu, 0.5, true); + } + if (omega <= 0) { + throw new NotStrictlyPositiveException(LocalizedFormats.NOT_POSITIVE_SCALE, omega); + } + + this.mu = mu; + this.omega = omega; + this.inverseAbsoluteAccuracy = inverseAbsoluteAccuracy; + } + + /** + * Access the shape parameter, {@code mu}. + * + * @return the shape parameter. + */ + public double getShape() { + return mu; + } + + /** + * Access the scale parameter, {@code omega}. + * + * @return the scale parameter. + */ + public double getScale() { + return omega; + } + + /** {@inheritDoc} */ + @Override + protected double getSolverAbsoluteAccuracy() { + return inverseAbsoluteAccuracy; + } + + /** {@inheritDoc} */ + public double density(double x) { + if (x <= 0) { + return 0.0; + } + return 2.0 + * FastMath.pow(mu, mu) + / (Gamma.gamma(mu) * FastMath.pow(omega, mu)) + * FastMath.pow(x, 2 * mu - 1) + * FastMath.exp(-mu * x * x / omega); + } + + /** {@inheritDoc} */ + public double cumulativeProbability(double x) { + return Gamma.regularizedGammaP(mu, mu * x * x / omega); + } + + /** {@inheritDoc} */ + public double getNumericalMean() { + return Gamma.gamma(mu + 0.5) / Gamma.gamma(mu) * FastMath.sqrt(omega / mu); + } + + /** {@inheritDoc} */ + public double getNumericalVariance() { + double v = Gamma.gamma(mu + 0.5) / Gamma.gamma(mu); + return omega * (1 - 1 / mu * v * v); + } + + /** {@inheritDoc} */ + public double getSupportLowerBound() { + return 0; + } + + /** {@inheritDoc} */ + public double getSupportUpperBound() { + return Double.POSITIVE_INFINITY; + } + + /** {@inheritDoc} */ + public boolean isSupportLowerBoundInclusive() { + return true; + } + + /** {@inheritDoc} */ + public boolean isSupportUpperBoundInclusive() { + return false; + } + + /** {@inheritDoc} */ + public boolean isSupportConnected() { + return true; + } +} diff --git a/src/main/java/org/apache/commons/math3/distribution/NormalDistribution.java b/src/main/java/org/apache/commons/math3/distribution/NormalDistribution.java new file mode 100644 index 0000000..a2bab56 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/distribution/NormalDistribution.java @@ -0,0 +1,308 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.math3.distribution; + +import org.apache.commons.math3.exception.NotStrictlyPositiveException; +import org.apache.commons.math3.exception.NumberIsTooLargeException; +import org.apache.commons.math3.exception.OutOfRangeException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.random.RandomGenerator; +import org.apache.commons.math3.random.Well19937c; +import org.apache.commons.math3.special.Erf; +import org.apache.commons.math3.util.FastMath; + +/** + * Implementation of the normal (gaussian) distribution. + * + * @see <a href="http://en.wikipedia.org/wiki/Normal_distribution">Normal distribution + * (Wikipedia)</a> + * @see <a href="http://mathworld.wolfram.com/NormalDistribution.html">Normal distribution + * (MathWorld)</a> + */ +public class NormalDistribution extends AbstractRealDistribution { + /** + * Default inverse cumulative probability accuracy. + * + * @since 2.1 + */ + public static final double DEFAULT_INVERSE_ABSOLUTE_ACCURACY = 1e-9; + + /** Serializable version identifier. */ + private static final long serialVersionUID = 8589540077390120676L; + + /** √(2) */ + private static final double SQRT2 = FastMath.sqrt(2.0); + + /** Mean of this distribution. */ + private final double mean; + + /** Standard deviation of this distribution. */ + private final double standardDeviation; + + /** The value of {@code log(sd) + 0.5*log(2*pi)} stored for faster computation. */ + private final double logStandardDeviationPlusHalfLog2Pi; + + /** Inverse cumulative probability accuracy. */ + private final double solverAbsoluteAccuracy; + + /** + * Create a normal distribution with mean equal to zero and standard deviation equal to one. + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link Well19937c} as + * random generator to be used for sampling only (see {@link #sample()} and {@link + * #sample(int)}). In case no sampling is needed for the created distribution, it is advised to + * pass {@code null} as random generator via the appropriate constructors to avoid the + * additional initialisation overhead. + */ + public NormalDistribution() { + this(0, 1); + } + + /** + * Create a normal distribution using the given mean and standard deviation. + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link Well19937c} as + * random generator to be used for sampling only (see {@link #sample()} and {@link + * #sample(int)}). In case no sampling is needed for the created distribution, it is advised to + * pass {@code null} as random generator via the appropriate constructors to avoid the + * additional initialisation overhead. + * + * @param mean Mean for this distribution. + * @param sd Standard deviation for this distribution. + * @throws NotStrictlyPositiveException if {@code sd <= 0}. + */ + public NormalDistribution(double mean, double sd) throws NotStrictlyPositiveException { + this(mean, sd, DEFAULT_INVERSE_ABSOLUTE_ACCURACY); + } + + /** + * Create a normal distribution using the given mean, standard deviation and inverse cumulative + * distribution accuracy. + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link Well19937c} as + * random generator to be used for sampling only (see {@link #sample()} and {@link + * #sample(int)}). In case no sampling is needed for the created distribution, it is advised to + * pass {@code null} as random generator via the appropriate constructors to avoid the + * additional initialisation overhead. + * + * @param mean Mean for this distribution. + * @param sd Standard deviation for this distribution. + * @param inverseCumAccuracy Inverse cumulative probability accuracy. + * @throws NotStrictlyPositiveException if {@code sd <= 0}. + * @since 2.1 + */ + public NormalDistribution(double mean, double sd, double inverseCumAccuracy) + throws NotStrictlyPositiveException { + this(new Well19937c(), mean, sd, inverseCumAccuracy); + } + + /** + * Creates a normal distribution. + * + * @param rng Random number generator. + * @param mean Mean for this distribution. + * @param sd Standard deviation for this distribution. + * @throws NotStrictlyPositiveException if {@code sd <= 0}. + * @since 3.3 + */ + public NormalDistribution(RandomGenerator rng, double mean, double sd) + throws NotStrictlyPositiveException { + this(rng, mean, sd, DEFAULT_INVERSE_ABSOLUTE_ACCURACY); + } + + /** + * Creates a normal distribution. + * + * @param rng Random number generator. + * @param mean Mean for this distribution. + * @param sd Standard deviation for this distribution. + * @param inverseCumAccuracy Inverse cumulative probability accuracy. + * @throws NotStrictlyPositiveException if {@code sd <= 0}. + * @since 3.1 + */ + public NormalDistribution( + RandomGenerator rng, double mean, double sd, double inverseCumAccuracy) + throws NotStrictlyPositiveException { + super(rng); + + if (sd <= 0) { + throw new NotStrictlyPositiveException(LocalizedFormats.STANDARD_DEVIATION, sd); + } + + this.mean = mean; + standardDeviation = sd; + logStandardDeviationPlusHalfLog2Pi = FastMath.log(sd) + 0.5 * FastMath.log(2 * FastMath.PI); + solverAbsoluteAccuracy = inverseCumAccuracy; + } + + /** + * Access the mean. + * + * @return the mean for this distribution. + */ + public double getMean() { + return mean; + } + + /** + * Access the standard deviation. + * + * @return the standard deviation for this distribution. + */ + public double getStandardDeviation() { + return standardDeviation; + } + + /** {@inheritDoc} */ + public double density(double x) { + return FastMath.exp(logDensity(x)); + } + + /** {@inheritDoc} */ + @Override + public double logDensity(double x) { + final double x0 = x - mean; + final double x1 = x0 / standardDeviation; + return -0.5 * x1 * x1 - logStandardDeviationPlusHalfLog2Pi; + } + + /** + * {@inheritDoc} + * + * <p>If {@code x} is more than 40 standard deviations from the mean, 0 or 1 is returned, as in + * these cases the actual value is within {@code Double.MIN_VALUE} of 0 or 1. + */ + public double cumulativeProbability(double x) { + final double dev = x - mean; + if (FastMath.abs(dev) > 40 * standardDeviation) { + return dev < 0 ? 0.0d : 1.0d; + } + return 0.5 * Erf.erfc(-dev / (standardDeviation * SQRT2)); + } + + /** + * {@inheritDoc} + * + * @since 3.2 + */ + @Override + public double inverseCumulativeProbability(final double p) throws OutOfRangeException { + if (p < 0.0 || p > 1.0) { + throw new OutOfRangeException(p, 0, 1); + } + return mean + standardDeviation * SQRT2 * Erf.erfInv(2 * p - 1); + } + + /** + * {@inheritDoc} + * + * @deprecated See {@link RealDistribution#cumulativeProbability(double,double)} + */ + @Override + @Deprecated + public double cumulativeProbability(double x0, double x1) throws NumberIsTooLargeException { + return probability(x0, x1); + } + + /** {@inheritDoc} */ + @Override + public double probability(double x0, double x1) throws NumberIsTooLargeException { + if (x0 > x1) { + throw new NumberIsTooLargeException( + LocalizedFormats.LOWER_ENDPOINT_ABOVE_UPPER_ENDPOINT, x0, x1, true); + } + final double denom = standardDeviation * SQRT2; + final double v0 = (x0 - mean) / denom; + final double v1 = (x1 - mean) / denom; + return 0.5 * Erf.erf(v0, v1); + } + + /** {@inheritDoc} */ + @Override + protected double getSolverAbsoluteAccuracy() { + return solverAbsoluteAccuracy; + } + + /** + * {@inheritDoc} + * + * <p>For mean parameter {@code mu}, the mean is {@code mu}. + */ + public double getNumericalMean() { + return getMean(); + } + + /** + * {@inheritDoc} + * + * <p>For standard deviation parameter {@code s}, the variance is {@code s^2}. + */ + public double getNumericalVariance() { + final double s = getStandardDeviation(); + return s * s; + } + + /** + * {@inheritDoc} + * + * <p>The lower bound of the support is always negative infinity no matter the parameters. + * + * @return lower bound of the support (always {@code Double.NEGATIVE_INFINITY}) + */ + public double getSupportLowerBound() { + return Double.NEGATIVE_INFINITY; + } + + /** + * {@inheritDoc} + * + * <p>The upper bound of the support is always positive infinity no matter the parameters. + * + * @return upper bound of the support (always {@code Double.POSITIVE_INFINITY}) + */ + public double getSupportUpperBound() { + return Double.POSITIVE_INFINITY; + } + + /** {@inheritDoc} */ + public boolean isSupportLowerBoundInclusive() { + return false; + } + + /** {@inheritDoc} */ + public boolean isSupportUpperBoundInclusive() { + return false; + } + + /** + * {@inheritDoc} + * + * <p>The support of this distribution is connected. + * + * @return {@code true} + */ + public boolean isSupportConnected() { + return true; + } + + /** {@inheritDoc} */ + @Override + public double sample() { + return standardDeviation * random.nextGaussian() + mean; + } +} diff --git a/src/main/java/org/apache/commons/math3/distribution/ParetoDistribution.java b/src/main/java/org/apache/commons/math3/distribution/ParetoDistribution.java new file mode 100644 index 0000000..c4d5d58 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/distribution/ParetoDistribution.java @@ -0,0 +1,315 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.math3.distribution; + +import org.apache.commons.math3.exception.NotStrictlyPositiveException; +import org.apache.commons.math3.exception.NumberIsTooLargeException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.random.RandomGenerator; +import org.apache.commons.math3.random.Well19937c; +import org.apache.commons.math3.util.FastMath; + +/** + * Implementation of the Pareto distribution. + * + * <p><strong>Parameters:</strong> The probability distribution function of {@code X} is given by + * (for {@code x >= k}): + * + * <pre> + * α * k^α / x^(α + 1) + * </pre> + * + * <p> + * + * <ul> + * <li>{@code k} is the <em>scale</em> parameter: this is the minimum possible value of {@code X}, + * <li>{@code α} is the <em>shape</em> parameter: this is the Pareto index + * </ul> + * + * @see <a href="http://en.wikipedia.org/wiki/Pareto_distribution">Pareto distribution + * (Wikipedia)</a> + * @see <a href="http://mathworld.wolfram.com/ParetoDistribution.html">Pareto distribution + * (MathWorld)</a> + * @since 3.3 + */ +public class ParetoDistribution extends AbstractRealDistribution { + + /** Default inverse cumulative probability accuracy. */ + public static final double DEFAULT_INVERSE_ABSOLUTE_ACCURACY = 1e-9; + + /** Serializable version identifier. */ + private static final long serialVersionUID = 20130424; + + /** The scale parameter of this distribution. */ + private final double scale; + + /** The shape parameter of this distribution. */ + private final double shape; + + /** Inverse cumulative probability accuracy. */ + private final double solverAbsoluteAccuracy; + + /** Create a Pareto distribution with a scale of {@code 1} and a shape of {@code 1}. */ + public ParetoDistribution() { + this(1, 1); + } + + /** + * Create a Pareto distribution using the specified scale and shape. + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link Well19937c} as + * random generator to be used for sampling only (see {@link #sample()} and {@link + * #sample(int)}). In case no sampling is needed for the created distribution, it is advised to + * pass {@code null} as random generator via the appropriate constructors to avoid the + * additional initialisation overhead. + * + * @param scale the scale parameter of this distribution + * @param shape the shape parameter of this distribution + * @throws NotStrictlyPositiveException if {@code scale <= 0} or {@code shape <= 0}. + */ + public ParetoDistribution(double scale, double shape) throws NotStrictlyPositiveException { + this(scale, shape, DEFAULT_INVERSE_ABSOLUTE_ACCURACY); + } + + /** + * Create a Pareto distribution using the specified scale, shape and inverse cumulative + * distribution accuracy. + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link Well19937c} as + * random generator to be used for sampling only (see {@link #sample()} and {@link + * #sample(int)}). In case no sampling is needed for the created distribution, it is advised to + * pass {@code null} as random generator via the appropriate constructors to avoid the + * additional initialisation overhead. + * + * @param scale the scale parameter of this distribution + * @param shape the shape parameter of this distribution + * @param inverseCumAccuracy Inverse cumulative probability accuracy. + * @throws NotStrictlyPositiveException if {@code scale <= 0} or {@code shape <= 0}. + */ + public ParetoDistribution(double scale, double shape, double inverseCumAccuracy) + throws NotStrictlyPositiveException { + this(new Well19937c(), scale, shape, inverseCumAccuracy); + } + + /** + * Creates a Pareto distribution. + * + * @param rng Random number generator. + * @param scale Scale parameter of this distribution. + * @param shape Shape parameter of this distribution. + * @throws NotStrictlyPositiveException if {@code scale <= 0} or {@code shape <= 0}. + */ + public ParetoDistribution(RandomGenerator rng, double scale, double shape) + throws NotStrictlyPositiveException { + this(rng, scale, shape, DEFAULT_INVERSE_ABSOLUTE_ACCURACY); + } + + /** + * Creates a Pareto distribution. + * + * @param rng Random number generator. + * @param scale Scale parameter of this distribution. + * @param shape Shape parameter of this distribution. + * @param inverseCumAccuracy Inverse cumulative probability accuracy. + * @throws NotStrictlyPositiveException if {@code scale <= 0} or {@code shape <= 0}. + */ + public ParetoDistribution( + RandomGenerator rng, double scale, double shape, double inverseCumAccuracy) + throws NotStrictlyPositiveException { + super(rng); + + if (scale <= 0) { + throw new NotStrictlyPositiveException(LocalizedFormats.SCALE, scale); + } + + if (shape <= 0) { + throw new NotStrictlyPositiveException(LocalizedFormats.SHAPE, shape); + } + + this.scale = scale; + this.shape = shape; + this.solverAbsoluteAccuracy = inverseCumAccuracy; + } + + /** + * Returns the scale parameter of this distribution. + * + * @return the scale parameter + */ + public double getScale() { + return scale; + } + + /** + * Returns the shape parameter of this distribution. + * + * @return the shape parameter + */ + public double getShape() { + return shape; + } + + /** + * {@inheritDoc} + * + * <p>For scale {@code k}, and shape {@code α} of this distribution, the PDF is given by + * + * <ul> + * <li>{@code 0} if {@code x < k}, + * <li>{@code α * k^α / x^(α + 1)} otherwise. + * </ul> + */ + public double density(double x) { + if (x < scale) { + return 0; + } + return FastMath.pow(scale, shape) / FastMath.pow(x, shape + 1) * shape; + } + + /** + * {@inheritDoc} + * + * <p>See documentation of {@link #density(double)} for computation details. + */ + @Override + public double logDensity(double x) { + if (x < scale) { + return Double.NEGATIVE_INFINITY; + } + return FastMath.log(scale) * shape - FastMath.log(x) * (shape + 1) + FastMath.log(shape); + } + + /** + * {@inheritDoc} + * + * <p>For scale {@code k}, and shape {@code α} of this distribution, the CDF is given by + * + * <ul> + * <li>{@code 0} if {@code x < k}, + * <li>{@code 1 - (k / x)^α} otherwise. + * </ul> + */ + public double cumulativeProbability(double x) { + if (x <= scale) { + return 0; + } + return 1 - FastMath.pow(scale / x, shape); + } + + /** + * {@inheritDoc} + * + * @deprecated See {@link RealDistribution#cumulativeProbability(double,double)} + */ + @Override + @Deprecated + public double cumulativeProbability(double x0, double x1) throws NumberIsTooLargeException { + return probability(x0, x1); + } + + /** {@inheritDoc} */ + @Override + protected double getSolverAbsoluteAccuracy() { + return solverAbsoluteAccuracy; + } + + /** + * {@inheritDoc} + * + * <p>For scale {@code k} and shape {@code α}, the mean is given by + * + * <ul> + * <li>{@code ∞} if {@code α <= 1}, + * <li>{@code α * k / (α - 1)} otherwise. + * </ul> + */ + public double getNumericalMean() { + if (shape <= 1) { + return Double.POSITIVE_INFINITY; + } + return shape * scale / (shape - 1); + } + + /** + * {@inheritDoc} + * + * <p>For scale {@code k} and shape {@code α}, the variance is given by + * + * <ul> + * <li>{@code ∞} if {@code 1 < α <= 2}, + * <li>{@code k^2 * α / ((α - 1)^2 * (α - 2))} otherwise. + * </ul> + */ + public double getNumericalVariance() { + if (shape <= 2) { + return Double.POSITIVE_INFINITY; + } + double s = shape - 1; + return scale * scale * shape / (s * s) / (shape - 2); + } + + /** + * {@inheritDoc} + * + * <p>The lower bound of the support is equal to the scale parameter {@code k}. + * + * @return lower bound of the support + */ + public double getSupportLowerBound() { + return scale; + } + + /** + * {@inheritDoc} + * + * <p>The upper bound of the support is always positive infinity no matter the parameters. + * + * @return upper bound of the support (always {@code Double.POSITIVE_INFINITY}) + */ + public double getSupportUpperBound() { + return Double.POSITIVE_INFINITY; + } + + /** {@inheritDoc} */ + public boolean isSupportLowerBoundInclusive() { + return true; + } + + /** {@inheritDoc} */ + public boolean isSupportUpperBoundInclusive() { + return false; + } + + /** + * {@inheritDoc} + * + * <p>The support of this distribution is connected. + * + * @return {@code true} + */ + public boolean isSupportConnected() { + return true; + } + + /** {@inheritDoc} */ + @Override + public double sample() { + final double n = random.nextDouble(); + return scale / FastMath.pow(n, 1 / shape); + } +} diff --git a/src/main/java/org/apache/commons/math3/distribution/PascalDistribution.java b/src/main/java/org/apache/commons/math3/distribution/PascalDistribution.java new file mode 100644 index 0000000..c850f8f --- /dev/null +++ b/src/main/java/org/apache/commons/math3/distribution/PascalDistribution.java @@ -0,0 +1,240 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.distribution; + +import org.apache.commons.math3.exception.NotStrictlyPositiveException; +import org.apache.commons.math3.exception.OutOfRangeException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.random.RandomGenerator; +import org.apache.commons.math3.random.Well19937c; +import org.apache.commons.math3.special.Beta; +import org.apache.commons.math3.util.CombinatoricsUtils; +import org.apache.commons.math3.util.FastMath; + +/** + * Implementation of the Pascal distribution. The Pascal distribution is a special case of the + * Negative Binomial distribution where the number of successes parameter is an integer. + * + * <p>There are various ways to express the probability mass and distribution functions for the + * Pascal distribution. The present implementation represents the distribution of the number of + * failures before {@code r} successes occur. This is the convention adopted in e.g. <a + * href="http://mathworld.wolfram.com/NegativeBinomialDistribution.html">MathWorld</a>, but + * <em>not</em> in <a + * href="http://en.wikipedia.org/wiki/Negative_binomial_distribution">Wikipedia</a>. + * + * <p>For a random variable {@code X} whose values are distributed according to this distribution, + * the probability mass function is given by<br> + * {@code P(X = k) = C(k + r - 1, r - 1) * p^r * (1 - p)^k,}<br> + * where {@code r} is the number of successes, {@code p} is the probability of success, and {@code + * X} is the total number of failures. {@code C(n, k)} is the binomial coefficient ({@code n} choose + * {@code k}). The mean and variance of {@code X} are<br> + * {@code E(X) = (1 - p) * r / p, var(X) = (1 - p) * r / p^2.}<br> + * Finally, the cumulative distribution function is given by<br> + * {@code P(X <= k) = I(p, r, k + 1)}, where I is the regularized incomplete Beta function. + * + * @see <a href="http://en.wikipedia.org/wiki/Negative_binomial_distribution">Negative binomial + * distribution (Wikipedia)</a> + * @see <a href="http://mathworld.wolfram.com/NegativeBinomialDistribution.html">Negative binomial + * distribution (MathWorld)</a> + * @since 1.2 (changed to concrete class in 3.0) + */ +public class PascalDistribution extends AbstractIntegerDistribution { + /** Serializable version identifier. */ + private static final long serialVersionUID = 6751309484392813623L; + + /** The number of successes. */ + private final int numberOfSuccesses; + + /** The probability of success. */ + private final double probabilityOfSuccess; + + /** + * The value of {@code log(p)}, where {@code p} is the probability of success, stored for faster + * computation. + */ + private final double logProbabilityOfSuccess; + + /** + * The value of {@code log(1-p)}, where {@code p} is the probability of success, stored for + * faster computation. + */ + private final double log1mProbabilityOfSuccess; + + /** + * Create a Pascal distribution with the given number of successes and probability of success. + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link Well19937c} as + * random generator to be used for sampling only (see {@link #sample()} and {@link + * #sample(int)}). In case no sampling is needed for the created distribution, it is advised to + * pass {@code null} as random generator via the appropriate constructors to avoid the + * additional initialisation overhead. + * + * @param r Number of successes. + * @param p Probability of success. + * @throws NotStrictlyPositiveException if the number of successes is not positive + * @throws OutOfRangeException if the probability of success is not in the range {@code [0, 1]}. + */ + public PascalDistribution(int r, double p) + throws NotStrictlyPositiveException, OutOfRangeException { + this(new Well19937c(), r, p); + } + + /** + * Create a Pascal distribution with the given number of successes and probability of success. + * + * @param rng Random number generator. + * @param r Number of successes. + * @param p Probability of success. + * @throws NotStrictlyPositiveException if the number of successes is not positive + * @throws OutOfRangeException if the probability of success is not in the range {@code [0, 1]}. + * @since 3.1 + */ + public PascalDistribution(RandomGenerator rng, int r, double p) + throws NotStrictlyPositiveException, OutOfRangeException { + super(rng); + + if (r <= 0) { + throw new NotStrictlyPositiveException(LocalizedFormats.NUMBER_OF_SUCCESSES, r); + } + if (p < 0 || p > 1) { + throw new OutOfRangeException(p, 0, 1); + } + + numberOfSuccesses = r; + probabilityOfSuccess = p; + logProbabilityOfSuccess = FastMath.log(p); + log1mProbabilityOfSuccess = FastMath.log1p(-p); + } + + /** + * Access the number of successes for this distribution. + * + * @return the number of successes. + */ + public int getNumberOfSuccesses() { + return numberOfSuccesses; + } + + /** + * Access the probability of success for this distribution. + * + * @return the probability of success. + */ + public double getProbabilityOfSuccess() { + return probabilityOfSuccess; + } + + /** {@inheritDoc} */ + public double probability(int x) { + double ret; + if (x < 0) { + ret = 0.0; + } else { + ret = + CombinatoricsUtils.binomialCoefficientDouble( + x + numberOfSuccesses - 1, numberOfSuccesses - 1) + * FastMath.pow(probabilityOfSuccess, numberOfSuccesses) + * FastMath.pow(1.0 - probabilityOfSuccess, x); + } + return ret; + } + + /** {@inheritDoc} */ + @Override + public double logProbability(int x) { + double ret; + if (x < 0) { + ret = Double.NEGATIVE_INFINITY; + } else { + ret = + CombinatoricsUtils.binomialCoefficientLog( + x + numberOfSuccesses - 1, numberOfSuccesses - 1) + + logProbabilityOfSuccess * numberOfSuccesses + + log1mProbabilityOfSuccess * x; + } + return ret; + } + + /** {@inheritDoc} */ + public double cumulativeProbability(int x) { + double ret; + if (x < 0) { + ret = 0.0; + } else { + ret = Beta.regularizedBeta(probabilityOfSuccess, numberOfSuccesses, x + 1.0); + } + return ret; + } + + /** + * {@inheritDoc} + * + * <p>For number of successes {@code r} and probability of success {@code p}, the mean is {@code + * r * (1 - p) / p}. + */ + public double getNumericalMean() { + final double p = getProbabilityOfSuccess(); + final double r = getNumberOfSuccesses(); + return (r * (1 - p)) / p; + } + + /** + * {@inheritDoc} + * + * <p>For number of successes {@code r} and probability of success {@code p}, the variance is + * {@code r * (1 - p) / p^2}. + */ + public double getNumericalVariance() { + final double p = getProbabilityOfSuccess(); + final double r = getNumberOfSuccesses(); + return r * (1 - p) / (p * p); + } + + /** + * {@inheritDoc} + * + * <p>The lower bound of the support is always 0 no matter the parameters. + * + * @return lower bound of the support (always 0) + */ + public int getSupportLowerBound() { + return 0; + } + + /** + * {@inheritDoc} + * + * <p>The upper bound of the support is always positive infinity no matter the parameters. + * Positive infinity is symbolized by {@code Integer.MAX_VALUE}. + * + * @return upper bound of the support (always {@code Integer.MAX_VALUE} for positive infinity) + */ + public int getSupportUpperBound() { + return Integer.MAX_VALUE; + } + + /** + * {@inheritDoc} + * + * <p>The support of this distribution is connected. + * + * @return {@code true} + */ + public boolean isSupportConnected() { + return true; + } +} diff --git a/src/main/java/org/apache/commons/math3/distribution/PoissonDistribution.java b/src/main/java/org/apache/commons/math3/distribution/PoissonDistribution.java new file mode 100644 index 0000000..7d9eab3 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/distribution/PoissonDistribution.java @@ -0,0 +1,394 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.distribution; + +import org.apache.commons.math3.exception.NotStrictlyPositiveException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.random.RandomGenerator; +import org.apache.commons.math3.random.Well19937c; +import org.apache.commons.math3.special.Gamma; +import org.apache.commons.math3.util.CombinatoricsUtils; +import org.apache.commons.math3.util.FastMath; +import org.apache.commons.math3.util.MathUtils; + +/** + * Implementation of the Poisson distribution. + * + * @see <a href="http://en.wikipedia.org/wiki/Poisson_distribution">Poisson distribution + * (Wikipedia)</a> + * @see <a href="http://mathworld.wolfram.com/PoissonDistribution.html">Poisson distribution + * (MathWorld)</a> + */ +public class PoissonDistribution extends AbstractIntegerDistribution { + /** + * Default maximum number of iterations for cumulative probability calculations. + * + * @since 2.1 + */ + public static final int DEFAULT_MAX_ITERATIONS = 10000000; + + /** + * Default convergence criterion. + * + * @since 2.1 + */ + public static final double DEFAULT_EPSILON = 1e-12; + + /** Serializable version identifier. */ + private static final long serialVersionUID = -3349935121172596109L; + + /** Distribution used to compute normal approximation. */ + private final NormalDistribution normal; + + /** Distribution needed for the {@link #sample()} method. */ + private final ExponentialDistribution exponential; + + /** Mean of the distribution. */ + private final double mean; + + /** + * Maximum number of iterations for cumulative probability. Cumulative probabilities are + * estimated using either Lanczos series approximation of {@link Gamma#regularizedGammaP(double, + * double, double, int)} or continued fraction approximation of {@link + * Gamma#regularizedGammaQ(double, double, double, int)}. + */ + private final int maxIterations; + + /** Convergence criterion for cumulative probability. */ + private final double epsilon; + + /** + * Creates a new Poisson distribution with specified mean. + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link Well19937c} as + * random generator to be used for sampling only (see {@link #sample()} and {@link + * #sample(int)}). In case no sampling is needed for the created distribution, it is advised to + * pass {@code null} as random generator via the appropriate constructors to avoid the + * additional initialisation overhead. + * + * @param p the Poisson mean + * @throws NotStrictlyPositiveException if {@code p <= 0}. + */ + public PoissonDistribution(double p) throws NotStrictlyPositiveException { + this(p, DEFAULT_EPSILON, DEFAULT_MAX_ITERATIONS); + } + + /** + * Creates a new Poisson distribution with specified mean, convergence criterion and maximum + * number of iterations. + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link Well19937c} as + * random generator to be used for sampling only (see {@link #sample()} and {@link + * #sample(int)}). In case no sampling is needed for the created distribution, it is advised to + * pass {@code null} as random generator via the appropriate constructors to avoid the + * additional initialisation overhead. + * + * @param p Poisson mean. + * @param epsilon Convergence criterion for cumulative probabilities. + * @param maxIterations the maximum number of iterations for cumulative probabilities. + * @throws NotStrictlyPositiveException if {@code p <= 0}. + * @since 2.1 + */ + public PoissonDistribution(double p, double epsilon, int maxIterations) + throws NotStrictlyPositiveException { + this(new Well19937c(), p, epsilon, maxIterations); + } + + /** + * Creates a new Poisson distribution with specified mean, convergence criterion and maximum + * number of iterations. + * + * @param rng Random number generator. + * @param p Poisson mean. + * @param epsilon Convergence criterion for cumulative probabilities. + * @param maxIterations the maximum number of iterations for cumulative probabilities. + * @throws NotStrictlyPositiveException if {@code p <= 0}. + * @since 3.1 + */ + public PoissonDistribution(RandomGenerator rng, double p, double epsilon, int maxIterations) + throws NotStrictlyPositiveException { + super(rng); + + if (p <= 0) { + throw new NotStrictlyPositiveException(LocalizedFormats.MEAN, p); + } + mean = p; + this.epsilon = epsilon; + this.maxIterations = maxIterations; + + // Use the same RNG instance as the parent class. + normal = + new NormalDistribution( + rng, + p, + FastMath.sqrt(p), + NormalDistribution.DEFAULT_INVERSE_ABSOLUTE_ACCURACY); + exponential = + new ExponentialDistribution( + rng, 1, ExponentialDistribution.DEFAULT_INVERSE_ABSOLUTE_ACCURACY); + } + + /** + * Creates a new Poisson distribution with the specified mean and convergence criterion. + * + * @param p Poisson mean. + * @param epsilon Convergence criterion for cumulative probabilities. + * @throws NotStrictlyPositiveException if {@code p <= 0}. + * @since 2.1 + */ + public PoissonDistribution(double p, double epsilon) throws NotStrictlyPositiveException { + this(p, epsilon, DEFAULT_MAX_ITERATIONS); + } + + /** + * Creates a new Poisson distribution with the specified mean and maximum number of iterations. + * + * @param p Poisson mean. + * @param maxIterations Maximum number of iterations for cumulative probabilities. + * @since 2.1 + */ + public PoissonDistribution(double p, int maxIterations) { + this(p, DEFAULT_EPSILON, maxIterations); + } + + /** + * Get the mean for the distribution. + * + * @return the mean for the distribution. + */ + public double getMean() { + return mean; + } + + /** {@inheritDoc} */ + public double probability(int x) { + final double logProbability = logProbability(x); + return logProbability == Double.NEGATIVE_INFINITY ? 0 : FastMath.exp(logProbability); + } + + /** {@inheritDoc} */ + @Override + public double logProbability(int x) { + double ret; + if (x < 0 || x == Integer.MAX_VALUE) { + ret = Double.NEGATIVE_INFINITY; + } else if (x == 0) { + ret = -mean; + } else { + ret = + -SaddlePointExpansion.getStirlingError(x) + - SaddlePointExpansion.getDeviancePart(x, mean) + - 0.5 * FastMath.log(MathUtils.TWO_PI) + - 0.5 * FastMath.log(x); + } + return ret; + } + + /** {@inheritDoc} */ + public double cumulativeProbability(int x) { + if (x < 0) { + return 0; + } + if (x == Integer.MAX_VALUE) { + return 1; + } + return Gamma.regularizedGammaQ((double) x + 1, mean, epsilon, maxIterations); + } + + /** + * Calculates the Poisson distribution function using a normal approximation. The {@code N(mean, + * sqrt(mean))} distribution is used to approximate the Poisson distribution. The computation + * uses "half-correction" (evaluating the normal distribution function at {@code x + 0.5}). + * + * @param x Upper bound, inclusive. + * @return the distribution function value calculated using a normal approximation. + */ + public double normalApproximateProbability(int x) { + // calculate the probability using half-correction + return normal.cumulativeProbability(x + 0.5); + } + + /** + * {@inheritDoc} + * + * <p>For mean parameter {@code p}, the mean is {@code p}. + */ + public double getNumericalMean() { + return getMean(); + } + + /** + * {@inheritDoc} + * + * <p>For mean parameter {@code p}, the variance is {@code p}. + */ + public double getNumericalVariance() { + return getMean(); + } + + /** + * {@inheritDoc} + * + * <p>The lower bound of the support is always 0 no matter the mean parameter. + * + * @return lower bound of the support (always 0) + */ + public int getSupportLowerBound() { + return 0; + } + + /** + * {@inheritDoc} + * + * <p>The upper bound of the support is positive infinity, regardless of the parameter values. + * There is no integer infinity, so this method returns {@code Integer.MAX_VALUE}. + * + * @return upper bound of the support (always {@code Integer.MAX_VALUE} for positive infinity) + */ + public int getSupportUpperBound() { + return Integer.MAX_VALUE; + } + + /** + * {@inheritDoc} + * + * <p>The support of this distribution is connected. + * + * @return {@code true} + */ + public boolean isSupportConnected() { + return true; + } + + /** + * {@inheritDoc} + * + * <p><strong>Algorithm Description</strong>: + * + * <ul> + * <li>For small means, uses simulation of a Poisson process using Uniform deviates, as + * described <a href="http://mathaa.epfl.ch/cours/PMMI2001/interactive/rng7.htm">here</a>. + * The Poisson process (and hence value returned) is bounded by 1000 * mean. + * <li>For large means, uses the rejection algorithm described in + * <blockquote> + * Devroye, Luc. (1981).<i>The Computer Generation of Poisson Random Variables</i><br> + * <strong>Computing</strong> vol. 26 pp. 197-207.<br> + * </blockquote> + * </ul> + * + * @return a random value. + * @since 2.2 + */ + @Override + public int sample() { + return (int) FastMath.min(nextPoisson(mean), Integer.MAX_VALUE); + } + + /** + * @param meanPoisson Mean of the Poisson distribution. + * @return the next sample. + */ + private long nextPoisson(double meanPoisson) { + final double pivot = 40.0d; + if (meanPoisson < pivot) { + double p = FastMath.exp(-meanPoisson); + long n = 0; + double r = 1.0d; + double rnd = 1.0d; + + while (n < 1000 * meanPoisson) { + rnd = random.nextDouble(); + r *= rnd; + if (r >= p) { + n++; + } else { + return n; + } + } + return n; + } else { + final double lambda = FastMath.floor(meanPoisson); + final double lambdaFractional = meanPoisson - lambda; + final double logLambda = FastMath.log(lambda); + final double logLambdaFactorial = CombinatoricsUtils.factorialLog((int) lambda); + final long y2 = lambdaFractional < Double.MIN_VALUE ? 0 : nextPoisson(lambdaFractional); + final double delta = + FastMath.sqrt(lambda * FastMath.log(32 * lambda / FastMath.PI + 1)); + final double halfDelta = delta / 2; + final double twolpd = 2 * lambda + delta; + final double a1 = FastMath.sqrt(FastMath.PI * twolpd) * FastMath.exp(1 / (8 * lambda)); + final double a2 = (twolpd / delta) * FastMath.exp(-delta * (1 + delta) / twolpd); + final double aSum = a1 + a2 + 1; + final double p1 = a1 / aSum; + final double p2 = a2 / aSum; + final double c1 = 1 / (8 * lambda); + + double x = 0; + double y = 0; + double v = 0; + int a = 0; + double t = 0; + double qr = 0; + double qa = 0; + for (; ; ) { + final double u = random.nextDouble(); + if (u <= p1) { + final double n = random.nextGaussian(); + x = n * FastMath.sqrt(lambda + halfDelta) - 0.5d; + if (x > delta || x < -lambda) { + continue; + } + y = x < 0 ? FastMath.floor(x) : FastMath.ceil(x); + final double e = exponential.sample(); + v = -e - (n * n / 2) + c1; + } else { + if (u > p1 + p2) { + y = lambda; + break; + } else { + x = delta + (twolpd / delta) * exponential.sample(); + y = FastMath.ceil(x); + v = -exponential.sample() - delta * (x + 1) / twolpd; + } + } + a = x < 0 ? 1 : 0; + t = y * (y + 1) / (2 * lambda); + if (v < -t && a == 0) { + y = lambda + y; + break; + } + qr = t * ((2 * y + 1) / (6 * lambda) - 1); + qa = qr - (t * t) / (3 * (lambda + a * (y + 1))); + if (v < qa) { + y = lambda + y; + break; + } + if (v > qr) { + continue; + } + if (v + < y * logLambda + - CombinatoricsUtils.factorialLog((int) (y + lambda)) + + logLambdaFactorial) { + y = lambda + y; + break; + } + } + return y2 + (long) y; + } + } +} diff --git a/src/main/java/org/apache/commons/math3/distribution/RealDistribution.java b/src/main/java/org/apache/commons/math3/distribution/RealDistribution.java new file mode 100644 index 0000000..bee70a3 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/distribution/RealDistribution.java @@ -0,0 +1,182 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.distribution; + +import org.apache.commons.math3.exception.NumberIsTooLargeException; +import org.apache.commons.math3.exception.OutOfRangeException; + +/** + * Base interface for distributions on the reals. + * + * @since 3.0 + */ +public interface RealDistribution { + /** + * For a random variable {@code X} whose values are distributed according to this distribution, + * this method returns {@code P(X = x)}. In other words, this method represents the probability + * mass function (PMF) for the distribution. + * + * @param x the point at which the PMF is evaluated + * @return the value of the probability mass function at point {@code x} + */ + double probability(double x); + + /** + * Returns the probability density function (PDF) of this distribution evaluated at the + * specified point {@code x}. In general, the PDF is the derivative of the {@link + * #cumulativeProbability(double) CDF}. If the derivative does not exist at {@code x}, then an + * appropriate replacement should be returned, e.g. {@code Double.POSITIVE_INFINITY}, {@code + * Double.NaN}, or the limit inferior or limit superior of the difference quotient. + * + * @param x the point at which the PDF is evaluated + * @return the value of the probability density function at point {@code x} + */ + double density(double x); + + /** + * For a random variable {@code X} whose values are distributed according to this distribution, + * this method returns {@code P(X <= x)}. In other words, this method represents the + * (cumulative) distribution function (CDF) for this distribution. + * + * @param x the point at which the CDF is evaluated + * @return the probability that a random variable with this distribution takes a value less than + * or equal to {@code x} + */ + double cumulativeProbability(double x); + + /** + * For a random variable {@code X} whose values are distributed according to this distribution, + * this method returns {@code P(x0 < X <= x1)}. + * + * @param x0 the exclusive lower bound + * @param x1 the inclusive upper bound + * @return the probability that a random variable with this distribution takes a value between + * {@code x0} and {@code x1}, excluding the lower and including the upper endpoint + * @throws NumberIsTooLargeException if {@code x0 > x1} + * @deprecated As of 3.1. In 4.0, this method will be renamed {@code probability(double x0, + * double x1)}. + */ + @Deprecated + double cumulativeProbability(double x0, double x1) throws NumberIsTooLargeException; + + /** + * Computes the quantile function of this distribution. For a random variable {@code X} + * distributed according to this distribution, the returned value is + * + * <ul> + * <li><code>inf{x in R | P(X<=x) >= p}</code> for {@code 0 < p <= 1}, + * <li><code>inf{x in R | P(X<=x) > 0}</code> for {@code p = 0}. + * </ul> + * + * @param p the cumulative probability + * @return the smallest {@code p}-quantile of this distribution (largest 0-quantile for {@code p + * = 0}) + * @throws OutOfRangeException if {@code p < 0} or {@code p > 1} + */ + double inverseCumulativeProbability(double p) throws OutOfRangeException; + + /** + * Use this method to get the numerical value of the mean of this distribution. + * + * @return the mean or {@code Double.NaN} if it is not defined + */ + double getNumericalMean(); + + /** + * Use this method to get the numerical value of the variance of this distribution. + * + * @return the variance (possibly {@code Double.POSITIVE_INFINITY} as for certain cases in + * {@link TDistribution}) or {@code Double.NaN} if it is not defined + */ + double getNumericalVariance(); + + /** + * Access the lower bound of the support. This method must return the same value as {@code + * inverseCumulativeProbability(0)}. In other words, this method must return + * + * <p><code>inf {x in R | P(X <= x) > 0}</code>. + * + * @return lower bound of the support (might be {@code Double.NEGATIVE_INFINITY}) + */ + double getSupportLowerBound(); + + /** + * Access the upper bound of the support. This method must return the same value as {@code + * inverseCumulativeProbability(1)}. In other words, this method must return + * + * <p><code>inf {x in R | P(X <= x) = 1}</code>. + * + * @return upper bound of the support (might be {@code Double.POSITIVE_INFINITY}) + */ + double getSupportUpperBound(); + + /** + * Whether or not the lower bound of support is in the domain of the density function. Returns + * true iff {@code getSupporLowerBound()} is finite and {@code density(getSupportLowerBound())} + * returns a non-NaN, non-infinite value. + * + * @return true if the lower bound of support is finite and the density function returns a + * non-NaN, non-infinite value there + * @deprecated to be removed in 4.0 + */ + @Deprecated + boolean isSupportLowerBoundInclusive(); + + /** + * Whether or not the upper bound of support is in the domain of the density function. Returns + * true iff {@code getSupportUpperBound()} is finite and {@code density(getSupportUpperBound())} + * returns a non-NaN, non-infinite value. + * + * @return true if the upper bound of support is finite and the density function returns a + * non-NaN, non-infinite value there + * @deprecated to be removed in 4.0 + */ + @Deprecated + boolean isSupportUpperBoundInclusive(); + + /** + * Use this method to get information about whether the support is connected, i.e. whether all + * values between the lower and upper bound of the support are included in the support. + * + * @return whether the support is connected or not + */ + boolean isSupportConnected(); + + /** + * Reseed the random generator used to generate samples. + * + * @param seed the new seed + */ + void reseedRandomGenerator(long seed); + + /** + * Generate a random value sampled from this distribution. + * + * @return a random value. + */ + double sample(); + + /** + * Generate a random sample from the distribution. + * + * @param sampleSize the number of random values to generate + * @return an array representing the random sample + * @throws org.apache.commons.math3.exception.NotStrictlyPositiveException if {@code sampleSize} + * is not positive + */ + double[] sample(int sampleSize); +} diff --git a/src/main/java/org/apache/commons/math3/distribution/SaddlePointExpansion.java b/src/main/java/org/apache/commons/math3/distribution/SaddlePointExpansion.java new file mode 100644 index 0000000..9dbceec --- /dev/null +++ b/src/main/java/org/apache/commons/math3/distribution/SaddlePointExpansion.java @@ -0,0 +1,199 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.distribution; + +import org.apache.commons.math3.special.Gamma; +import org.apache.commons.math3.util.FastMath; +import org.apache.commons.math3.util.MathUtils; + +/** + * Utility class used by various distributions to accurately compute their respective probability + * mass functions. The implementation for this class is based on the Catherine Loader's <a + * target="_blank" href="http://www.herine.net/stat/software/dbinom.html">dbinom</a> routines. + * + * <p>This class is not intended to be called directly. + * + * <p>References: + * + * <ol> + * <li>Catherine Loader (2000). "Fast and Accurate Computation of Binomial Probabilities.". <a + * target="_blank" href="http://www.herine.net/stat/papers/dbinom.pdf"> + * http://www.herine.net/stat/papers/dbinom.pdf</a> + * </ol> + * + * @since 2.1 + */ +final class SaddlePointExpansion { + + /** 1/2 * log(2 π). */ + private static final double HALF_LOG_2_PI = 0.5 * FastMath.log(MathUtils.TWO_PI); + + /** exact Stirling expansion error for certain values. */ + private static final double[] EXACT_STIRLING_ERRORS = { + 0.0, /* 0.0 */ + 0.1534264097200273452913848, /* 0.5 */ + 0.0810614667953272582196702, /* 1.0 */ + 0.0548141210519176538961390, /* 1.5 */ + 0.0413406959554092940938221, /* 2.0 */ + 0.03316287351993628748511048, /* 2.5 */ + 0.02767792568499833914878929, /* 3.0 */ + 0.02374616365629749597132920, /* 3.5 */ + 0.02079067210376509311152277, /* 4.0 */ + 0.01848845053267318523077934, /* 4.5 */ + 0.01664469118982119216319487, /* 5.0 */ + 0.01513497322191737887351255, /* 5.5 */ + 0.01387612882307074799874573, /* 6.0 */ + 0.01281046524292022692424986, /* 6.5 */ + 0.01189670994589177009505572, /* 7.0 */ + 0.01110455975820691732662991, /* 7.5 */ + 0.010411265261972096497478567, /* 8.0 */ + 0.009799416126158803298389475, /* 8.5 */ + 0.009255462182712732917728637, /* 9.0 */ + 0.008768700134139385462952823, /* 9.5 */ + 0.008330563433362871256469318, /* 10.0 */ + 0.007934114564314020547248100, /* 10.5 */ + 0.007573675487951840794972024, /* 11.0 */ + 0.007244554301320383179543912, /* 11.5 */ + 0.006942840107209529865664152, /* 12.0 */ + 0.006665247032707682442354394, /* 12.5 */ + 0.006408994188004207068439631, /* 13.0 */ + 0.006171712263039457647532867, /* 13.5 */ + 0.005951370112758847735624416, /* 14.0 */ + 0.005746216513010115682023589, /* 14.5 */ + 0.005554733551962801371038690 /* 15.0 */ + }; + + /** Default constructor. */ + private SaddlePointExpansion() { + super(); + } + + /** + * Compute the error of Stirling's series at the given value. + * + * <p>References: + * + * <ol> + * <li>Eric W. Weisstein. "Stirling's Series." From MathWorld--A Wolfram Web Resource. <a + * target="_blank" href="http://mathworld.wolfram.com/StirlingsSeries.html"> + * http://mathworld.wolfram.com/StirlingsSeries.html</a> + * </ol> + * + * @param z the value. + * @return the Striling's series error. + */ + static double getStirlingError(double z) { + double ret; + if (z < 15.0) { + double z2 = 2.0 * z; + if (FastMath.floor(z2) == z2) { + ret = EXACT_STIRLING_ERRORS[(int) z2]; + } else { + ret = Gamma.logGamma(z + 1.0) - (z + 0.5) * FastMath.log(z) + z - HALF_LOG_2_PI; + } + } else { + double z2 = z * z; + ret = + (0.083333333333333333333 + - (0.00277777777777777777778 + - (0.00079365079365079365079365 + - (0.000595238095238095238095238 + - 0.0008417508417508417508417508 + / z2) + / z2) + / z2) + / z2) + / z; + } + return ret; + } + + /** + * A part of the deviance portion of the saddle point approximation. + * + * <p>References: + * + * <ol> + * <li>Catherine Loader (2000). "Fast and Accurate Computation of Binomial Probabilities.". <a + * target="_blank" href="http://www.herine.net/stat/papers/dbinom.pdf"> + * http://www.herine.net/stat/papers/dbinom.pdf</a> + * </ol> + * + * @param x the x value. + * @param mu the average. + * @return a part of the deviance. + */ + static double getDeviancePart(double x, double mu) { + double ret; + if (FastMath.abs(x - mu) < 0.1 * (x + mu)) { + double d = x - mu; + double v = d / (x + mu); + double s1 = v * d; + double s = Double.NaN; + double ej = 2.0 * x * v; + v *= v; + int j = 1; + while (s1 != s) { + s = s1; + ej *= v; + s1 = s + ej / ((j * 2) + 1); + ++j; + } + ret = s1; + } else { + ret = x * FastMath.log(x / mu) + mu - x; + } + return ret; + } + + /** + * Compute the logarithm of the PMF for a binomial distribution using the saddle point + * expansion. + * + * @param x the value at which the probability is evaluated. + * @param n the number of trials. + * @param p the probability of success. + * @param q the probability of failure (1 - p). + * @return log(p(x)). + */ + static double logBinomialProbability(int x, int n, double p, double q) { + double ret; + if (x == 0) { + if (p < 0.1) { + ret = -getDeviancePart(n, n * q) - n * p; + } else { + ret = n * FastMath.log(q); + } + } else if (x == n) { + if (q < 0.1) { + ret = -getDeviancePart(n, n * p) - n * q; + } else { + ret = n * FastMath.log(p); + } + } else { + ret = + getStirlingError(n) + - getStirlingError(x) + - getStirlingError(n - x) + - getDeviancePart(x, n * p) + - getDeviancePart(n - x, n * q); + double f = (MathUtils.TWO_PI * x * (n - x)) / n; + ret = -0.5 * FastMath.log(f) + ret; + } + return ret; + } +} diff --git a/src/main/java/org/apache/commons/math3/distribution/TDistribution.java b/src/main/java/org/apache/commons/math3/distribution/TDistribution.java new file mode 100644 index 0000000..8e6053a --- /dev/null +++ b/src/main/java/org/apache/commons/math3/distribution/TDistribution.java @@ -0,0 +1,270 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.distribution; + +import org.apache.commons.math3.exception.NotStrictlyPositiveException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.random.RandomGenerator; +import org.apache.commons.math3.random.Well19937c; +import org.apache.commons.math3.special.Beta; +import org.apache.commons.math3.special.Gamma; +import org.apache.commons.math3.util.FastMath; + +/** + * Implementation of Student's t-distribution. + * + * @see "<a href='http://en.wikipedia.org/wiki/Student's_t-distribution'>Student's + * t-distribution (Wikipedia)</a>" + * @see "<a href='http://mathworld.wolfram.com/Studentst-Distribution.html'>Student's t-distribution + * (MathWorld)</a>" + */ +public class TDistribution extends AbstractRealDistribution { + /** + * Default inverse cumulative probability accuracy. + * + * @since 2.1 + */ + public static final double DEFAULT_INVERSE_ABSOLUTE_ACCURACY = 1e-9; + + /** Serializable version identifier */ + private static final long serialVersionUID = -5852615386664158222L; + + /** The degrees of freedom. */ + private final double degreesOfFreedom; + + /** Inverse cumulative probability accuracy. */ + private final double solverAbsoluteAccuracy; + + /** Static computation factor based on degreesOfFreedom. */ + private final double factor; + + /** + * Create a t distribution using the given degrees of freedom. + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link Well19937c} as + * random generator to be used for sampling only (see {@link #sample()} and {@link + * #sample(int)}). In case no sampling is needed for the created distribution, it is advised to + * pass {@code null} as random generator via the appropriate constructors to avoid the + * additional initialisation overhead. + * + * @param degreesOfFreedom Degrees of freedom. + * @throws NotStrictlyPositiveException if {@code degreesOfFreedom <= 0} + */ + public TDistribution(double degreesOfFreedom) throws NotStrictlyPositiveException { + this(degreesOfFreedom, DEFAULT_INVERSE_ABSOLUTE_ACCURACY); + } + + /** + * Create a t distribution using the given degrees of freedom and the specified inverse + * cumulative probability absolute accuracy. + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link Well19937c} as + * random generator to be used for sampling only (see {@link #sample()} and {@link + * #sample(int)}). In case no sampling is needed for the created distribution, it is advised to + * pass {@code null} as random generator via the appropriate constructors to avoid the + * additional initialisation overhead. + * + * @param degreesOfFreedom Degrees of freedom. + * @param inverseCumAccuracy the maximum absolute error in inverse cumulative probability + * estimates (defaults to {@link #DEFAULT_INVERSE_ABSOLUTE_ACCURACY}). + * @throws NotStrictlyPositiveException if {@code degreesOfFreedom <= 0} + * @since 2.1 + */ + public TDistribution(double degreesOfFreedom, double inverseCumAccuracy) + throws NotStrictlyPositiveException { + this(new Well19937c(), degreesOfFreedom, inverseCumAccuracy); + } + + /** + * Creates a t distribution. + * + * @param rng Random number generator. + * @param degreesOfFreedom Degrees of freedom. + * @throws NotStrictlyPositiveException if {@code degreesOfFreedom <= 0} + * @since 3.3 + */ + public TDistribution(RandomGenerator rng, double degreesOfFreedom) + throws NotStrictlyPositiveException { + this(rng, degreesOfFreedom, DEFAULT_INVERSE_ABSOLUTE_ACCURACY); + } + + /** + * Creates a t distribution. + * + * @param rng Random number generator. + * @param degreesOfFreedom Degrees of freedom. + * @param inverseCumAccuracy the maximum absolute error in inverse cumulative probability + * estimates (defaults to {@link #DEFAULT_INVERSE_ABSOLUTE_ACCURACY}). + * @throws NotStrictlyPositiveException if {@code degreesOfFreedom <= 0} + * @since 3.1 + */ + public TDistribution(RandomGenerator rng, double degreesOfFreedom, double inverseCumAccuracy) + throws NotStrictlyPositiveException { + super(rng); + + if (degreesOfFreedom <= 0) { + throw new NotStrictlyPositiveException( + LocalizedFormats.DEGREES_OF_FREEDOM, degreesOfFreedom); + } + this.degreesOfFreedom = degreesOfFreedom; + solverAbsoluteAccuracy = inverseCumAccuracy; + + final double n = degreesOfFreedom; + final double nPlus1Over2 = (n + 1) / 2; + factor = + Gamma.logGamma(nPlus1Over2) + - 0.5 * (FastMath.log(FastMath.PI) + FastMath.log(n)) + - Gamma.logGamma(n / 2); + } + + /** + * Access the degrees of freedom. + * + * @return the degrees of freedom. + */ + public double getDegreesOfFreedom() { + return degreesOfFreedom; + } + + /** {@inheritDoc} */ + public double density(double x) { + return FastMath.exp(logDensity(x)); + } + + /** {@inheritDoc} */ + @Override + public double logDensity(double x) { + final double n = degreesOfFreedom; + final double nPlus1Over2 = (n + 1) / 2; + return factor - nPlus1Over2 * FastMath.log(1 + x * x / n); + } + + /** {@inheritDoc} */ + public double cumulativeProbability(double x) { + double ret; + if (x == 0) { + ret = 0.5; + } else { + double t = + Beta.regularizedBeta( + degreesOfFreedom / (degreesOfFreedom + (x * x)), + 0.5 * degreesOfFreedom, + 0.5); + if (x < 0.0) { + ret = 0.5 * t; + } else { + ret = 1.0 - 0.5 * t; + } + } + + return ret; + } + + /** {@inheritDoc} */ + @Override + protected double getSolverAbsoluteAccuracy() { + return solverAbsoluteAccuracy; + } + + /** + * {@inheritDoc} + * + * <p>For degrees of freedom parameter {@code df}, the mean is + * + * <ul> + * <li>if {@code df > 1} then {@code 0}, + * <li>else undefined ({@code Double.NaN}). + * </ul> + */ + public double getNumericalMean() { + final double df = getDegreesOfFreedom(); + + if (df > 1) { + return 0; + } + + return Double.NaN; + } + + /** + * {@inheritDoc} + * + * <p>For degrees of freedom parameter {@code df}, the variance is + * + * <ul> + * <li>if {@code df > 2} then {@code df / (df - 2)}, + * <li>if {@code 1 < df <= 2} then positive infinity ({@code Double.POSITIVE_INFINITY}), + * <li>else undefined ({@code Double.NaN}). + * </ul> + */ + public double getNumericalVariance() { + final double df = getDegreesOfFreedom(); + + if (df > 2) { + return df / (df - 2); + } + + if (df > 1 && df <= 2) { + return Double.POSITIVE_INFINITY; + } + + return Double.NaN; + } + + /** + * {@inheritDoc} + * + * <p>The lower bound of the support is always negative infinity no matter the parameters. + * + * @return lower bound of the support (always {@code Double.NEGATIVE_INFINITY}) + */ + public double getSupportLowerBound() { + return Double.NEGATIVE_INFINITY; + } + + /** + * {@inheritDoc} + * + * <p>The upper bound of the support is always positive infinity no matter the parameters. + * + * @return upper bound of the support (always {@code Double.POSITIVE_INFINITY}) + */ + public double getSupportUpperBound() { + return Double.POSITIVE_INFINITY; + } + + /** {@inheritDoc} */ + public boolean isSupportLowerBoundInclusive() { + return false; + } + + /** {@inheritDoc} */ + public boolean isSupportUpperBoundInclusive() { + return false; + } + + /** + * {@inheritDoc} + * + * <p>The support of this distribution is connected. + * + * @return {@code true} + */ + public boolean isSupportConnected() { + return true; + } +} diff --git a/src/main/java/org/apache/commons/math3/distribution/TriangularDistribution.java b/src/main/java/org/apache/commons/math3/distribution/TriangularDistribution.java new file mode 100644 index 0000000..a7feadc --- /dev/null +++ b/src/main/java/org/apache/commons/math3/distribution/TriangularDistribution.java @@ -0,0 +1,274 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.math3.distribution; + +import org.apache.commons.math3.exception.NumberIsTooLargeException; +import org.apache.commons.math3.exception.NumberIsTooSmallException; +import org.apache.commons.math3.exception.OutOfRangeException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.random.RandomGenerator; +import org.apache.commons.math3.random.Well19937c; +import org.apache.commons.math3.util.FastMath; + +/** + * Implementation of the triangular real distribution. + * + * @see <a href="http://en.wikipedia.org/wiki/Triangular_distribution">Triangular distribution + * (Wikipedia)</a> + * @since 3.0 + */ +public class TriangularDistribution extends AbstractRealDistribution { + /** Serializable version identifier. */ + private static final long serialVersionUID = 20120112L; + + /** Lower limit of this distribution (inclusive). */ + private final double a; + + /** Upper limit of this distribution (inclusive). */ + private final double b; + + /** Mode of this distribution. */ + private final double c; + + /** Inverse cumulative probability accuracy. */ + private final double solverAbsoluteAccuracy; + + /** + * Creates a triangular real distribution using the given lower limit, upper limit, and mode. + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link Well19937c} as + * random generator to be used for sampling only (see {@link #sample()} and {@link + * #sample(int)}). In case no sampling is needed for the created distribution, it is advised to + * pass {@code null} as random generator via the appropriate constructors to avoid the + * additional initialisation overhead. + * + * @param a Lower limit of this distribution (inclusive). + * @param b Upper limit of this distribution (inclusive). + * @param c Mode of this distribution. + * @throws NumberIsTooLargeException if {@code a >= b} or if {@code c > b}. + * @throws NumberIsTooSmallException if {@code c < a}. + */ + public TriangularDistribution(double a, double c, double b) + throws NumberIsTooLargeException, NumberIsTooSmallException { + this(new Well19937c(), a, c, b); + } + + /** + * Creates a triangular distribution. + * + * @param rng Random number generator. + * @param a Lower limit of this distribution (inclusive). + * @param b Upper limit of this distribution (inclusive). + * @param c Mode of this distribution. + * @throws NumberIsTooLargeException if {@code a >= b} or if {@code c > b}. + * @throws NumberIsTooSmallException if {@code c < a}. + * @since 3.1 + */ + public TriangularDistribution(RandomGenerator rng, double a, double c, double b) + throws NumberIsTooLargeException, NumberIsTooSmallException { + super(rng); + + if (a >= b) { + throw new NumberIsTooLargeException( + LocalizedFormats.LOWER_BOUND_NOT_BELOW_UPPER_BOUND, a, b, false); + } + if (c < a) { + throw new NumberIsTooSmallException(LocalizedFormats.NUMBER_TOO_SMALL, c, a, true); + } + if (c > b) { + throw new NumberIsTooLargeException(LocalizedFormats.NUMBER_TOO_LARGE, c, b, true); + } + + this.a = a; + this.c = c; + this.b = b; + solverAbsoluteAccuracy = FastMath.max(FastMath.ulp(a), FastMath.ulp(b)); + } + + /** + * Returns the mode {@code c} of this distribution. + * + * @return the mode {@code c} of this distribution + */ + public double getMode() { + return c; + } + + /** + * {@inheritDoc} + * + * <p>For this distribution, the returned value is not really meaningful, since exact formulas + * are implemented for the computation of the {@link #inverseCumulativeProbability(double)} (no + * solver is invoked). + * + * <p>For lower limit {@code a} and upper limit {@code b}, the current implementation returns + * {@code max(ulp(a), ulp(b)}. + */ + @Override + protected double getSolverAbsoluteAccuracy() { + return solverAbsoluteAccuracy; + } + + /** + * {@inheritDoc} + * + * <p>For lower limit {@code a}, upper limit {@code b} and mode {@code c}, the PDF is given by + * + * <ul> + * <li>{@code 2 * (x - a) / [(b - a) * (c - a)]} if {@code a <= x < c}, + * <li>{@code 2 / (b - a)} if {@code x = c}, + * <li>{@code 2 * (b - x) / [(b - a) * (b - c)]} if {@code c < x <= b}, + * <li>{@code 0} otherwise. + * </ul> + */ + public double density(double x) { + if (x < a) { + return 0; + } + if (a <= x && x < c) { + double divident = 2 * (x - a); + double divisor = (b - a) * (c - a); + return divident / divisor; + } + if (x == c) { + return 2 / (b - a); + } + if (c < x && x <= b) { + double divident = 2 * (b - x); + double divisor = (b - a) * (b - c); + return divident / divisor; + } + return 0; + } + + /** + * {@inheritDoc} + * + * <p>For lower limit {@code a}, upper limit {@code b} and mode {@code c}, the CDF is given by + * + * <ul> + * <li>{@code 0} if {@code x < a}, + * <li>{@code (x - a)^2 / [(b - a) * (c - a)]} if {@code a <= x < c}, + * <li>{@code (c - a) / (b - a)} if {@code x = c}, + * <li>{@code 1 - (b - x)^2 / [(b - a) * (b - c)]} if {@code c < x <= b}, + * <li>{@code 1} if {@code x > b}. + * </ul> + */ + public double cumulativeProbability(double x) { + if (x < a) { + return 0; + } + if (a <= x && x < c) { + double divident = (x - a) * (x - a); + double divisor = (b - a) * (c - a); + return divident / divisor; + } + if (x == c) { + return (c - a) / (b - a); + } + if (c < x && x <= b) { + double divident = (b - x) * (b - x); + double divisor = (b - a) * (b - c); + return 1 - (divident / divisor); + } + return 1; + } + + /** + * {@inheritDoc} + * + * <p>For lower limit {@code a}, upper limit {@code b}, and mode {@code c}, the mean is {@code + * (a + b + c) / 3}. + */ + public double getNumericalMean() { + return (a + b + c) / 3; + } + + /** + * {@inheritDoc} + * + * <p>For lower limit {@code a}, upper limit {@code b}, and mode {@code c}, the variance is + * {@code (a^2 + b^2 + c^2 - a * b - a * c - b * c) / 18}. + */ + public double getNumericalVariance() { + return (a * a + b * b + c * c - a * b - a * c - b * c) / 18; + } + + /** + * {@inheritDoc} + * + * <p>The lower bound of the support is equal to the lower limit parameter {@code a} of the + * distribution. + * + * @return lower bound of the support + */ + public double getSupportLowerBound() { + return a; + } + + /** + * {@inheritDoc} + * + * <p>The upper bound of the support is equal to the upper limit parameter {@code b} of the + * distribution. + * + * @return upper bound of the support + */ + public double getSupportUpperBound() { + return b; + } + + /** {@inheritDoc} */ + public boolean isSupportLowerBoundInclusive() { + return true; + } + + /** {@inheritDoc} */ + public boolean isSupportUpperBoundInclusive() { + return true; + } + + /** + * {@inheritDoc} + * + * <p>The support of this distribution is connected. + * + * @return {@code true} + */ + public boolean isSupportConnected() { + return true; + } + + /** {@inheritDoc} */ + @Override + public double inverseCumulativeProbability(double p) throws OutOfRangeException { + if (p < 0 || p > 1) { + throw new OutOfRangeException(p, 0, 1); + } + if (p == 0) { + return a; + } + if (p == 1) { + return b; + } + if (p < (c - a) / (b - a)) { + return a + FastMath.sqrt(p * (b - a) * (c - a)); + } + return b - FastMath.sqrt((1 - p) * (b - a) * (b - c)); + } +} diff --git a/src/main/java/org/apache/commons/math3/distribution/UniformIntegerDistribution.java b/src/main/java/org/apache/commons/math3/distribution/UniformIntegerDistribution.java new file mode 100644 index 0000000..8a3a98b --- /dev/null +++ b/src/main/java/org/apache/commons/math3/distribution/UniformIntegerDistribution.java @@ -0,0 +1,174 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.math3.distribution; + +import org.apache.commons.math3.exception.NumberIsTooLargeException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.random.RandomGenerator; +import org.apache.commons.math3.random.Well19937c; + +/** + * Implementation of the uniform integer distribution. + * + * @see <a href="http://en.wikipedia.org/wiki/Uniform_distribution_(discrete)" >Uniform distribution + * (discrete), at Wikipedia</a> + * @since 3.0 + */ +public class UniformIntegerDistribution extends AbstractIntegerDistribution { + /** Serializable version identifier. */ + private static final long serialVersionUID = 20120109L; + + /** Lower bound (inclusive) of this distribution. */ + private final int lower; + + /** Upper bound (inclusive) of this distribution. */ + private final int upper; + + /** + * Creates a new uniform integer distribution using the given lower and upper bounds (both + * inclusive). + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link Well19937c} as + * random generator to be used for sampling only (see {@link #sample()} and {@link + * #sample(int)}). In case no sampling is needed for the created distribution, it is advised to + * pass {@code null} as random generator via the appropriate constructors to avoid the + * additional initialisation overhead. + * + * @param lower Lower bound (inclusive) of this distribution. + * @param upper Upper bound (inclusive) of this distribution. + * @throws NumberIsTooLargeException if {@code lower >= upper}. + */ + public UniformIntegerDistribution(int lower, int upper) throws NumberIsTooLargeException { + this(new Well19937c(), lower, upper); + } + + /** + * Creates a new uniform integer distribution using the given lower and upper bounds (both + * inclusive). + * + * @param rng Random number generator. + * @param lower Lower bound (inclusive) of this distribution. + * @param upper Upper bound (inclusive) of this distribution. + * @throws NumberIsTooLargeException if {@code lower > upper}. + * @since 3.1 + */ + public UniformIntegerDistribution(RandomGenerator rng, int lower, int upper) + throws NumberIsTooLargeException { + super(rng); + + if (lower > upper) { + throw new NumberIsTooLargeException( + LocalizedFormats.LOWER_BOUND_NOT_BELOW_UPPER_BOUND, lower, upper, true); + } + this.lower = lower; + this.upper = upper; + } + + /** {@inheritDoc} */ + public double probability(int x) { + if (x < lower || x > upper) { + return 0; + } + return 1.0 / (upper - lower + 1); + } + + /** {@inheritDoc} */ + public double cumulativeProbability(int x) { + if (x < lower) { + return 0; + } + if (x > upper) { + return 1; + } + return (x - lower + 1.0) / (upper - lower + 1.0); + } + + /** + * {@inheritDoc} + * + * <p>For lower bound {@code lower} and upper bound {@code upper}, the mean is {@code 0.5 * + * (lower + upper)}. + */ + public double getNumericalMean() { + return 0.5 * (lower + upper); + } + + /** + * {@inheritDoc} + * + * <p>For lower bound {@code lower} and upper bound {@code upper}, and {@code n = upper - lower + * + 1}, the variance is {@code (n^2 - 1) / 12}. + */ + public double getNumericalVariance() { + double n = upper - lower + 1; + return (n * n - 1) / 12.0; + } + + /** + * {@inheritDoc} + * + * <p>The lower bound of the support is equal to the lower bound parameter of the distribution. + * + * @return lower bound of the support + */ + public int getSupportLowerBound() { + return lower; + } + + /** + * {@inheritDoc} + * + * <p>The upper bound of the support is equal to the upper bound parameter of the distribution. + * + * @return upper bound of the support + */ + public int getSupportUpperBound() { + return upper; + } + + /** + * {@inheritDoc} + * + * <p>The support of this distribution is connected. + * + * @return {@code true} + */ + public boolean isSupportConnected() { + return true; + } + + /** {@inheritDoc} */ + @Override + public int sample() { + final int max = (upper - lower) + 1; + if (max <= 0) { + // The range is too wide to fit in a positive int (larger + // than 2^31); as it covers more than half the integer range, + // we use a simple rejection method. + while (true) { + final int r = random.nextInt(); + if (r >= lower && r <= upper) { + return r; + } + } + } else { + // We can shift the range and directly generate a positive int. + return lower + random.nextInt(max); + } + } +} diff --git a/src/main/java/org/apache/commons/math3/distribution/UniformRealDistribution.java b/src/main/java/org/apache/commons/math3/distribution/UniformRealDistribution.java new file mode 100644 index 0000000..a3ccd97 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/distribution/UniformRealDistribution.java @@ -0,0 +1,234 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.math3.distribution; + +import org.apache.commons.math3.exception.NumberIsTooLargeException; +import org.apache.commons.math3.exception.OutOfRangeException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.random.RandomGenerator; +import org.apache.commons.math3.random.Well19937c; + +/** + * Implementation of the uniform real distribution. + * + * @see <a href="http://en.wikipedia.org/wiki/Uniform_distribution_(continuous)" >Uniform + * distribution (continuous), at Wikipedia</a> + * @since 3.0 + */ +public class UniformRealDistribution extends AbstractRealDistribution { + /** + * Default inverse cumulative probability accuracy. + * + * @deprecated as of 3.2 not used anymore, will be removed in 4.0 + */ + @Deprecated public static final double DEFAULT_INVERSE_ABSOLUTE_ACCURACY = 1e-9; + + /** Serializable version identifier. */ + private static final long serialVersionUID = 20120109L; + + /** Lower bound of this distribution (inclusive). */ + private final double lower; + + /** Upper bound of this distribution (exclusive). */ + private final double upper; + + /** + * Create a standard uniform real distribution with lower bound (inclusive) equal to zero and + * upper bound (exclusive) equal to one. + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link Well19937c} as + * random generator to be used for sampling only (see {@link #sample()} and {@link + * #sample(int)}). In case no sampling is needed for the created distribution, it is advised to + * pass {@code null} as random generator via the appropriate constructors to avoid the + * additional initialisation overhead. + */ + public UniformRealDistribution() { + this(0, 1); + } + + /** + * Create a uniform real distribution using the given lower and upper bounds. + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link Well19937c} as + * random generator to be used for sampling only (see {@link #sample()} and {@link + * #sample(int)}). In case no sampling is needed for the created distribution, it is advised to + * pass {@code null} as random generator via the appropriate constructors to avoid the + * additional initialisation overhead. + * + * @param lower Lower bound of this distribution (inclusive). + * @param upper Upper bound of this distribution (exclusive). + * @throws NumberIsTooLargeException if {@code lower >= upper}. + */ + public UniformRealDistribution(double lower, double upper) throws NumberIsTooLargeException { + this(new Well19937c(), lower, upper); + } + + /** + * Create a uniform distribution. + * + * @param lower Lower bound of this distribution (inclusive). + * @param upper Upper bound of this distribution (exclusive). + * @param inverseCumAccuracy Inverse cumulative probability accuracy. + * @throws NumberIsTooLargeException if {@code lower >= upper}. + * @deprecated as of 3.2, inverse CDF is now calculated analytically, use {@link + * #UniformRealDistribution(double, double)} instead. + */ + @Deprecated + public UniformRealDistribution(double lower, double upper, double inverseCumAccuracy) + throws NumberIsTooLargeException { + this(new Well19937c(), lower, upper); + } + + /** + * Creates a uniform distribution. + * + * @param rng Random number generator. + * @param lower Lower bound of this distribution (inclusive). + * @param upper Upper bound of this distribution (exclusive). + * @param inverseCumAccuracy Inverse cumulative probability accuracy. + * @throws NumberIsTooLargeException if {@code lower >= upper}. + * @since 3.1 + * @deprecated as of 3.2, inverse CDF is now calculated analytically, use {@link + * #UniformRealDistribution(RandomGenerator, double, double)} instead. + */ + @Deprecated + public UniformRealDistribution( + RandomGenerator rng, double lower, double upper, double inverseCumAccuracy) { + this(rng, lower, upper); + } + + /** + * Creates a uniform distribution. + * + * @param rng Random number generator. + * @param lower Lower bound of this distribution (inclusive). + * @param upper Upper bound of this distribution (exclusive). + * @throws NumberIsTooLargeException if {@code lower >= upper}. + * @since 3.1 + */ + public UniformRealDistribution(RandomGenerator rng, double lower, double upper) + throws NumberIsTooLargeException { + super(rng); + if (lower >= upper) { + throw new NumberIsTooLargeException( + LocalizedFormats.LOWER_BOUND_NOT_BELOW_UPPER_BOUND, lower, upper, false); + } + + this.lower = lower; + this.upper = upper; + } + + /** {@inheritDoc} */ + public double density(double x) { + if (x < lower || x > upper) { + return 0.0; + } + return 1 / (upper - lower); + } + + /** {@inheritDoc} */ + public double cumulativeProbability(double x) { + if (x <= lower) { + return 0; + } + if (x >= upper) { + return 1; + } + return (x - lower) / (upper - lower); + } + + /** {@inheritDoc} */ + @Override + public double inverseCumulativeProbability(final double p) throws OutOfRangeException { + if (p < 0.0 || p > 1.0) { + throw new OutOfRangeException(p, 0, 1); + } + return p * (upper - lower) + lower; + } + + /** + * {@inheritDoc} + * + * <p>For lower bound {@code lower} and upper bound {@code upper}, the mean is {@code 0.5 * + * (lower + upper)}. + */ + public double getNumericalMean() { + return 0.5 * (lower + upper); + } + + /** + * {@inheritDoc} + * + * <p>For lower bound {@code lower} and upper bound {@code upper}, the variance is {@code (upper + * - lower)^2 / 12}. + */ + public double getNumericalVariance() { + double ul = upper - lower; + return ul * ul / 12; + } + + /** + * {@inheritDoc} + * + * <p>The lower bound of the support is equal to the lower bound parameter of the distribution. + * + * @return lower bound of the support + */ + public double getSupportLowerBound() { + return lower; + } + + /** + * {@inheritDoc} + * + * <p>The upper bound of the support is equal to the upper bound parameter of the distribution. + * + * @return upper bound of the support + */ + public double getSupportUpperBound() { + return upper; + } + + /** {@inheritDoc} */ + public boolean isSupportLowerBoundInclusive() { + return true; + } + + /** {@inheritDoc} */ + public boolean isSupportUpperBoundInclusive() { + return true; + } + + /** + * {@inheritDoc} + * + * <p>The support of this distribution is connected. + * + * @return {@code true} + */ + public boolean isSupportConnected() { + return true; + } + + /** {@inheritDoc} */ + @Override + public double sample() { + final double u = random.nextDouble(); + return u * upper + (1 - u) * lower; + } +} diff --git a/src/main/java/org/apache/commons/math3/distribution/WeibullDistribution.java b/src/main/java/org/apache/commons/math3/distribution/WeibullDistribution.java new file mode 100644 index 0000000..b7d2953 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/distribution/WeibullDistribution.java @@ -0,0 +1,346 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.math3.distribution; + +import org.apache.commons.math3.exception.NotStrictlyPositiveException; +import org.apache.commons.math3.exception.OutOfRangeException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.random.RandomGenerator; +import org.apache.commons.math3.random.Well19937c; +import org.apache.commons.math3.special.Gamma; +import org.apache.commons.math3.util.FastMath; + +/** + * Implementation of the Weibull distribution. This implementation uses the two parameter form of + * the distribution defined by <a href="http://mathworld.wolfram.com/WeibullDistribution.html"> + * Weibull Distribution</a>, equations (1) and (2). + * + * @see <a href="http://en.wikipedia.org/wiki/Weibull_distribution">Weibull distribution + * (Wikipedia)</a> + * @see <a href="http://mathworld.wolfram.com/WeibullDistribution.html">Weibull distribution + * (MathWorld)</a> + * @since 1.1 (changed to concrete class in 3.0) + */ +public class WeibullDistribution extends AbstractRealDistribution { + /** + * Default inverse cumulative probability accuracy. + * + * @since 2.1 + */ + public static final double DEFAULT_INVERSE_ABSOLUTE_ACCURACY = 1e-9; + + /** Serializable version identifier. */ + private static final long serialVersionUID = 8589540077390120676L; + + /** The shape parameter. */ + private final double shape; + + /** The scale parameter. */ + private final double scale; + + /** Inverse cumulative probability accuracy. */ + private final double solverAbsoluteAccuracy; + + /** Cached numerical mean */ + private double numericalMean = Double.NaN; + + /** Whether or not the numerical mean has been calculated */ + private boolean numericalMeanIsCalculated = false; + + /** Cached numerical variance */ + private double numericalVariance = Double.NaN; + + /** Whether or not the numerical variance has been calculated */ + private boolean numericalVarianceIsCalculated = false; + + /** + * Create a Weibull distribution with the given shape and scale and a location equal to zero. + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link Well19937c} as + * random generator to be used for sampling only (see {@link #sample()} and {@link + * #sample(int)}). In case no sampling is needed for the created distribution, it is advised to + * pass {@code null} as random generator via the appropriate constructors to avoid the + * additional initialisation overhead. + * + * @param alpha Shape parameter. + * @param beta Scale parameter. + * @throws NotStrictlyPositiveException if {@code alpha <= 0} or {@code beta <= 0}. + */ + public WeibullDistribution(double alpha, double beta) throws NotStrictlyPositiveException { + this(alpha, beta, DEFAULT_INVERSE_ABSOLUTE_ACCURACY); + } + + /** + * Create a Weibull distribution with the given shape, scale and inverse cumulative probability + * accuracy and a location equal to zero. + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link Well19937c} as + * random generator to be used for sampling only (see {@link #sample()} and {@link + * #sample(int)}). In case no sampling is needed for the created distribution, it is advised to + * pass {@code null} as random generator via the appropriate constructors to avoid the + * additional initialisation overhead. + * + * @param alpha Shape parameter. + * @param beta Scale parameter. + * @param inverseCumAccuracy Maximum absolute error in inverse cumulative probability estimates + * (defaults to {@link #DEFAULT_INVERSE_ABSOLUTE_ACCURACY}). + * @throws NotStrictlyPositiveException if {@code alpha <= 0} or {@code beta <= 0}. + * @since 2.1 + */ + public WeibullDistribution(double alpha, double beta, double inverseCumAccuracy) { + this(new Well19937c(), alpha, beta, inverseCumAccuracy); + } + + /** + * Creates a Weibull distribution. + * + * @param rng Random number generator. + * @param alpha Shape parameter. + * @param beta Scale parameter. + * @throws NotStrictlyPositiveException if {@code alpha <= 0} or {@code beta <= 0}. + * @since 3.3 + */ + public WeibullDistribution(RandomGenerator rng, double alpha, double beta) + throws NotStrictlyPositiveException { + this(rng, alpha, beta, DEFAULT_INVERSE_ABSOLUTE_ACCURACY); + } + + /** + * Creates a Weibull distribution. + * + * @param rng Random number generator. + * @param alpha Shape parameter. + * @param beta Scale parameter. + * @param inverseCumAccuracy Maximum absolute error in inverse cumulative probability estimates + * (defaults to {@link #DEFAULT_INVERSE_ABSOLUTE_ACCURACY}). + * @throws NotStrictlyPositiveException if {@code alpha <= 0} or {@code beta <= 0}. + * @since 3.1 + */ + public WeibullDistribution( + RandomGenerator rng, double alpha, double beta, double inverseCumAccuracy) + throws NotStrictlyPositiveException { + super(rng); + + if (alpha <= 0) { + throw new NotStrictlyPositiveException(LocalizedFormats.SHAPE, alpha); + } + if (beta <= 0) { + throw new NotStrictlyPositiveException(LocalizedFormats.SCALE, beta); + } + scale = beta; + shape = alpha; + solverAbsoluteAccuracy = inverseCumAccuracy; + } + + /** + * Access the shape parameter, {@code alpha}. + * + * @return the shape parameter, {@code alpha}. + */ + public double getShape() { + return shape; + } + + /** + * Access the scale parameter, {@code beta}. + * + * @return the scale parameter, {@code beta}. + */ + public double getScale() { + return scale; + } + + /** {@inheritDoc} */ + public double density(double x) { + if (x < 0) { + return 0; + } + + final double xscale = x / scale; + final double xscalepow = FastMath.pow(xscale, shape - 1); + + /* + * FastMath.pow(x / scale, shape) = + * FastMath.pow(xscale, shape) = + * FastMath.pow(xscale, shape - 1) * xscale + */ + final double xscalepowshape = xscalepow * xscale; + + return (shape / scale) * xscalepow * FastMath.exp(-xscalepowshape); + } + + /** {@inheritDoc} */ + @Override + public double logDensity(double x) { + if (x < 0) { + return Double.NEGATIVE_INFINITY; + } + + final double xscale = x / scale; + final double logxscalepow = FastMath.log(xscale) * (shape - 1); + + /* + * FastMath.pow(x / scale, shape) = + * FastMath.pow(xscale, shape) = + * FastMath.pow(xscale, shape - 1) * xscale + */ + final double xscalepowshape = FastMath.exp(logxscalepow) * xscale; + + return FastMath.log(shape / scale) + logxscalepow - xscalepowshape; + } + + /** {@inheritDoc} */ + public double cumulativeProbability(double x) { + double ret; + if (x <= 0.0) { + ret = 0.0; + } else { + ret = 1.0 - FastMath.exp(-FastMath.pow(x / scale, shape)); + } + return ret; + } + + /** + * {@inheritDoc} + * + * <p>Returns {@code 0} when {@code p == 0} and {@code Double.POSITIVE_INFINITY} when {@code p + * == 1}. + */ + @Override + public double inverseCumulativeProbability(double p) { + double ret; + if (p < 0.0 || p > 1.0) { + throw new OutOfRangeException(p, 0.0, 1.0); + } else if (p == 0) { + ret = 0.0; + } else if (p == 1) { + ret = Double.POSITIVE_INFINITY; + } else { + ret = scale * FastMath.pow(-FastMath.log1p(-p), 1.0 / shape); + } + return ret; + } + + /** + * Return the absolute accuracy setting of the solver used to estimate inverse cumulative + * probabilities. + * + * @return the solver absolute accuracy. + * @since 2.1 + */ + @Override + protected double getSolverAbsoluteAccuracy() { + return solverAbsoluteAccuracy; + } + + /** + * {@inheritDoc} + * + * <p>The mean is {@code scale * Gamma(1 + (1 / shape))}, where {@code Gamma()} is the + * Gamma-function. + */ + public double getNumericalMean() { + if (!numericalMeanIsCalculated) { + numericalMean = calculateNumericalMean(); + numericalMeanIsCalculated = true; + } + return numericalMean; + } + + /** + * used by {@link #getNumericalMean()} + * + * @return the mean of this distribution + */ + protected double calculateNumericalMean() { + final double sh = getShape(); + final double sc = getScale(); + + return sc * FastMath.exp(Gamma.logGamma(1 + (1 / sh))); + } + + /** + * {@inheritDoc} + * + * <p>The variance is {@code scale^2 * Gamma(1 + (2 / shape)) - mean^2} where {@code Gamma()} is + * the Gamma-function. + */ + public double getNumericalVariance() { + if (!numericalVarianceIsCalculated) { + numericalVariance = calculateNumericalVariance(); + numericalVarianceIsCalculated = true; + } + return numericalVariance; + } + + /** + * used by {@link #getNumericalVariance()} + * + * @return the variance of this distribution + */ + protected double calculateNumericalVariance() { + final double sh = getShape(); + final double sc = getScale(); + final double mn = getNumericalMean(); + + return (sc * sc) * FastMath.exp(Gamma.logGamma(1 + (2 / sh))) - (mn * mn); + } + + /** + * {@inheritDoc} + * + * <p>The lower bound of the support is always 0 no matter the parameters. + * + * @return lower bound of the support (always 0) + */ + public double getSupportLowerBound() { + return 0; + } + + /** + * {@inheritDoc} + * + * <p>The upper bound of the support is always positive infinity no matter the parameters. + * + * @return upper bound of the support (always {@code Double.POSITIVE_INFINITY}) + */ + public double getSupportUpperBound() { + return Double.POSITIVE_INFINITY; + } + + /** {@inheritDoc} */ + public boolean isSupportLowerBoundInclusive() { + return true; + } + + /** {@inheritDoc} */ + public boolean isSupportUpperBoundInclusive() { + return false; + } + + /** + * {@inheritDoc} + * + * <p>The support of this distribution is connected. + * + * @return {@code true} + */ + public boolean isSupportConnected() { + return true; + } +} diff --git a/src/main/java/org/apache/commons/math3/distribution/ZipfDistribution.java b/src/main/java/org/apache/commons/math3/distribution/ZipfDistribution.java new file mode 100644 index 0000000..d452122 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/distribution/ZipfDistribution.java @@ -0,0 +1,502 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.math3.distribution; + +import org.apache.commons.math3.exception.NotStrictlyPositiveException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.random.RandomGenerator; +import org.apache.commons.math3.random.Well19937c; +import org.apache.commons.math3.util.FastMath; + +/** + * Implementation of the Zipf distribution. + * + * <p><strong>Parameters:</strong> For a random variable {@code X} whose values are distributed + * according to this distribution, the probability mass function is given by + * + * <pre> + * P(X = k) = H(N,s) * 1 / k^s for {@code k = 1,2,...,N}. + * </pre> + * + * {@code H(N,s)} is the normalizing constant which corresponds to the generalized harmonic number + * of order N of s. + * + * <p> + * + * <ul> + * <li>{@code N} is the number of elements + * <li>{@code s} is the exponent + * </ul> + * + * @see <a href="https://en.wikipedia.org/wiki/Zipf's_law">Zipf's law (Wikipedia)</a> + * @see <a + * href="https://en.wikipedia.org/wiki/Harmonic_number#Generalized_harmonic_numbers">Generalized + * harmonic numbers</a> + */ +public class ZipfDistribution extends AbstractIntegerDistribution { + /** Serializable version identifier. */ + private static final long serialVersionUID = -140627372283420404L; + + /** Number of elements. */ + private final int numberOfElements; + + /** Exponent parameter of the distribution. */ + private final double exponent; + + /** Cached numerical mean */ + private double numericalMean = Double.NaN; + + /** Whether or not the numerical mean has been calculated */ + private boolean numericalMeanIsCalculated = false; + + /** Cached numerical variance */ + private double numericalVariance = Double.NaN; + + /** Whether or not the numerical variance has been calculated */ + private boolean numericalVarianceIsCalculated = false; + + /** The sampler to be used for the sample() method */ + private transient ZipfRejectionInversionSampler sampler; + + /** + * Create a new Zipf distribution with the given number of elements and exponent. + * + * <p><b>Note:</b> this constructor will implicitly create an instance of {@link Well19937c} as + * random generator to be used for sampling only (see {@link #sample()} and {@link + * #sample(int)}). In case no sampling is needed for the created distribution, it is advised to + * pass {@code null} as random generator via the appropriate constructors to avoid the + * additional initialisation overhead. + * + * @param numberOfElements Number of elements. + * @param exponent Exponent. + * @exception NotStrictlyPositiveException if {@code numberOfElements <= 0} or {@code exponent + * <= 0}. + */ + public ZipfDistribution(final int numberOfElements, final double exponent) { + this(new Well19937c(), numberOfElements, exponent); + } + + /** + * Creates a Zipf distribution. + * + * @param rng Random number generator. + * @param numberOfElements Number of elements. + * @param exponent Exponent. + * @exception NotStrictlyPositiveException if {@code numberOfElements <= 0} or {@code exponent + * <= 0}. + * @since 3.1 + */ + public ZipfDistribution(RandomGenerator rng, int numberOfElements, double exponent) + throws NotStrictlyPositiveException { + super(rng); + + if (numberOfElements <= 0) { + throw new NotStrictlyPositiveException(LocalizedFormats.DIMENSION, numberOfElements); + } + if (exponent <= 0) { + throw new NotStrictlyPositiveException(LocalizedFormats.EXPONENT, exponent); + } + + this.numberOfElements = numberOfElements; + this.exponent = exponent; + } + + /** + * Get the number of elements (e.g. corpus size) for the distribution. + * + * @return the number of elements + */ + public int getNumberOfElements() { + return numberOfElements; + } + + /** + * Get the exponent characterizing the distribution. + * + * @return the exponent + */ + public double getExponent() { + return exponent; + } + + /** {@inheritDoc} */ + public double probability(final int x) { + if (x <= 0 || x > numberOfElements) { + return 0.0; + } + + return (1.0 / FastMath.pow(x, exponent)) / generalizedHarmonic(numberOfElements, exponent); + } + + /** {@inheritDoc} */ + @Override + public double logProbability(int x) { + if (x <= 0 || x > numberOfElements) { + return Double.NEGATIVE_INFINITY; + } + + return -FastMath.log(x) * exponent + - FastMath.log(generalizedHarmonic(numberOfElements, exponent)); + } + + /** {@inheritDoc} */ + public double cumulativeProbability(final int x) { + if (x <= 0) { + return 0.0; + } else if (x >= numberOfElements) { + return 1.0; + } + + return generalizedHarmonic(x, exponent) / generalizedHarmonic(numberOfElements, exponent); + } + + /** + * {@inheritDoc} + * + * <p>For number of elements {@code N} and exponent {@code s}, the mean is {@code Hs1 / Hs}, + * where + * + * <ul> + * <li>{@code Hs1 = generalizedHarmonic(N, s - 1)}, + * <li>{@code Hs = generalizedHarmonic(N, s)}. + * </ul> + */ + public double getNumericalMean() { + if (!numericalMeanIsCalculated) { + numericalMean = calculateNumericalMean(); + numericalMeanIsCalculated = true; + } + return numericalMean; + } + + /** + * Used by {@link #getNumericalMean()}. + * + * @return the mean of this distribution + */ + protected double calculateNumericalMean() { + final int N = getNumberOfElements(); + final double s = getExponent(); + + final double Hs1 = generalizedHarmonic(N, s - 1); + final double Hs = generalizedHarmonic(N, s); + + return Hs1 / Hs; + } + + /** + * {@inheritDoc} + * + * <p>For number of elements {@code N} and exponent {@code s}, the mean is {@code (Hs2 / Hs) - + * (Hs1^2 / Hs^2)}, where + * + * <ul> + * <li>{@code Hs2 = generalizedHarmonic(N, s - 2)}, + * <li>{@code Hs1 = generalizedHarmonic(N, s - 1)}, + * <li>{@code Hs = generalizedHarmonic(N, s)}. + * </ul> + */ + public double getNumericalVariance() { + if (!numericalVarianceIsCalculated) { + numericalVariance = calculateNumericalVariance(); + numericalVarianceIsCalculated = true; + } + return numericalVariance; + } + + /** + * Used by {@link #getNumericalVariance()}. + * + * @return the variance of this distribution + */ + protected double calculateNumericalVariance() { + final int N = getNumberOfElements(); + final double s = getExponent(); + + final double Hs2 = generalizedHarmonic(N, s - 2); + final double Hs1 = generalizedHarmonic(N, s - 1); + final double Hs = generalizedHarmonic(N, s); + + return (Hs2 / Hs) - ((Hs1 * Hs1) / (Hs * Hs)); + } + + /** + * Calculates the Nth generalized harmonic number. See <a + * href="http://mathworld.wolfram.com/HarmonicSeries.html">Harmonic Series</a>. + * + * @param n Term in the series to calculate (must be larger than 1) + * @param m Exponent (special case {@code m = 1} is the harmonic series). + * @return the n<sup>th</sup> generalized harmonic number. + */ + private double generalizedHarmonic(final int n, final double m) { + double value = 0; + for (int k = n; k > 0; --k) { + value += 1.0 / FastMath.pow(k, m); + } + return value; + } + + /** + * {@inheritDoc} + * + * <p>The lower bound of the support is always 1 no matter the parameters. + * + * @return lower bound of the support (always 1) + */ + public int getSupportLowerBound() { + return 1; + } + + /** + * {@inheritDoc} + * + * <p>The upper bound of the support is the number of elements. + * + * @return upper bound of the support + */ + public int getSupportUpperBound() { + return getNumberOfElements(); + } + + /** + * {@inheritDoc} + * + * <p>The support of this distribution is connected. + * + * @return {@code true} + */ + public boolean isSupportConnected() { + return true; + } + + /** {@inheritDoc} */ + @Override + public int sample() { + if (sampler == null) { + sampler = new ZipfRejectionInversionSampler(numberOfElements, exponent); + } + return sampler.sample(random); + } + + /** + * Utility class implementing a rejection inversion sampling method for a discrete, bounded Zipf + * distribution that is based on the method described in + * + * <p>Wolfgang Hörmann and Gerhard Derflinger "Rejection-inversion to generate variates from + * monotone discrete distributions." ACM Transactions on Modeling and Computer Simulation + * (TOMACS) 6.3 (1996): 169-184. + * + * <p>The paper describes an algorithm for exponents larger than 1 (Algorithm ZRI). The original + * method uses {@code H(x) := (v + x)^(1 - q) / (1 - q)} as the integral of the hat function. + * This function is undefined for q = 1, which is the reason for the limitation of the exponent. + * If instead the integral function {@code H(x) := ((v + x)^(1 - q) - 1) / (1 - q)} is used, for + * which a meaningful limit exists for q = 1, the method works for all positive exponents. + * + * <p>The following implementation uses v := 0 and generates integral numbers in the range [1, + * numberOfElements]. This is different to the original method where v is defined to be positive + * and numbers are taken from [0, i_max]. This explains why the implementation looks slightly + * different. + * + * @since 3.6 + */ + static final class ZipfRejectionInversionSampler { + + /** Exponent parameter of the distribution. */ + private final double exponent; + + /** Number of elements. */ + private final int numberOfElements; + + /** Constant equal to {@code hIntegral(1.5) - 1}. */ + private final double hIntegralX1; + + /** Constant equal to {@code hIntegral(numberOfElements + 0.5)}. */ + private final double hIntegralNumberOfElements; + + /** Constant equal to {@code 2 - hIntegralInverse(hIntegral(2.5) - h(2)}. */ + private final double s; + + /** + * Simple constructor. + * + * @param numberOfElements number of elements + * @param exponent exponent parameter of the distribution + */ + ZipfRejectionInversionSampler(final int numberOfElements, final double exponent) { + this.exponent = exponent; + this.numberOfElements = numberOfElements; + this.hIntegralX1 = hIntegral(1.5) - 1d; + this.hIntegralNumberOfElements = hIntegral(numberOfElements + 0.5); + this.s = 2d - hIntegralInverse(hIntegral(2.5) - h(2)); + } + + /** + * Generate one integral number in the range [1, numberOfElements]. + * + * @param random random generator to use + * @return generated integral number in the range [1, numberOfElements] + */ + int sample(final RandomGenerator random) { + while (true) { + + final double u = + hIntegralNumberOfElements + + random.nextDouble() * (hIntegralX1 - hIntegralNumberOfElements); + // u is uniformly distributed in (hIntegralX1, hIntegralNumberOfElements] + + double x = hIntegralInverse(u); + + int k = (int) (x + 0.5); + + // Limit k to the range [1, numberOfElements] + // (k could be outside due to numerical inaccuracies) + if (k < 1) { + k = 1; + } else if (k > numberOfElements) { + k = numberOfElements; + } + + // Here, the distribution of k is given by: + // + // P(k = 1) = C * (hIntegral(1.5) - hIntegralX1) = C + // P(k = m) = C * (hIntegral(m + 1/2) - hIntegral(m - 1/2)) for m >= 2 + // + // where C := 1 / (hIntegralNumberOfElements - hIntegralX1) + + if (k - x <= s || u >= hIntegral(k + 0.5) - h(k)) { + + // Case k = 1: + // + // The right inequality is always true, because replacing k by 1 gives + // u >= hIntegral(1.5) - h(1) = hIntegralX1 and u is taken from + // (hIntegralX1, hIntegralNumberOfElements]. + // + // Therefore, the acceptance rate for k = 1 is P(accepted | k = 1) = 1 + // and the probability that 1 is returned as random value is + // P(k = 1 and accepted) = P(accepted | k = 1) * P(k = 1) = C = C / 1^exponent + // + // Case k >= 2: + // + // The left inequality (k - x <= s) is just a short cut + // to avoid the more expensive evaluation of the right inequality + // (u >= hIntegral(k + 0.5) - h(k)) in many cases. + // + // If the left inequality is true, the right inequality is also true: + // Theorem 2 in the paper is valid for all positive exponents, because + // the requirements h'(x) = -exponent/x^(exponent + 1) < 0 and + // (-1/hInverse'(x))'' = (1+1/exponent) * x^(1/exponent-1) >= 0 + // are both fulfilled. + // Therefore, f(x) := x - hIntegralInverse(hIntegral(x + 0.5) - h(x)) + // is a non-decreasing function. If k - x <= s holds, + // k - x <= s + f(k) - f(2) is obviously also true which is equivalent to + // -x <= -hIntegralInverse(hIntegral(k + 0.5) - h(k)), + // -hIntegralInverse(u) <= -hIntegralInverse(hIntegral(k + 0.5) - h(k)), + // and finally u >= hIntegral(k + 0.5) - h(k). + // + // Hence, the right inequality determines the acceptance rate: + // P(accepted | k = m) = h(m) / (hIntegrated(m+1/2) - hIntegrated(m-1/2)) + // The probability that m is returned is given by + // P(k = m and accepted) = P(accepted | k = m) * P(k = m) = C * h(m) = C / + // m^exponent. + // + // In both cases the probabilities are proportional to the probability mass + // function + // of the Zipf distribution. + + return k; + } + } + } + + /** + * {@code H(x) :=} + * + * <ul> + * <li>{@code (x^(1-exponent) - 1)/(1 - exponent)}, if {@code exponent != 1} + * <li>{@code log(x)}, if {@code exponent == 1} + * </ul> + * + * H(x) is an integral function of h(x), the derivative of H(x) is h(x). + * + * @param x free parameter + * @return {@code H(x)} + */ + private double hIntegral(final double x) { + final double logX = FastMath.log(x); + return helper2((1d - exponent) * logX) * logX; + } + + /** + * {@code h(x) := 1/x^exponent} + * + * @param x free parameter + * @return h(x) + */ + private double h(final double x) { + return FastMath.exp(-exponent * FastMath.log(x)); + } + + /** + * The inverse function of H(x). + * + * @param x free parameter + * @return y for which {@code H(y) = x} + */ + private double hIntegralInverse(final double x) { + double t = x * (1d - exponent); + if (t < -1d) { + // Limit value to the range [-1, +inf). + // t could be smaller than -1 in some rare cases due to numerical errors. + t = -1; + } + return FastMath.exp(helper1(t) * x); + } + + /** + * Helper function that calculates {@code log(1+x)/x}. + * + * <p>A Taylor series expansion is used, if x is close to 0. + * + * @param x a value larger than or equal to -1 + * @return {@code log(1+x)/x} + */ + static double helper1(final double x) { + if (FastMath.abs(x) > 1e-8) { + return FastMath.log1p(x) / x; + } else { + return 1. - x * ((1. / 2.) - x * ((1. / 3.) - x * (1. / 4.))); + } + } + + /** + * Helper function to calculate {@code (exp(x)-1)/x}. + * + * <p>A Taylor series expansion is used, if x is close to 0. + * + * @param x free parameter + * @return {@code (exp(x)-1)/x} if x is non-zero, or 1 if x=0 + */ + static double helper2(final double x) { + if (FastMath.abs(x) > 1e-8) { + return FastMath.expm1(x) / x; + } else { + return 1. + x * (1. / 2.) * (1. + x * (1. / 3.) * (1. + x * (1. / 4.))); + } + } + } +} diff --git a/src/main/java/org/apache/commons/math3/distribution/fitting/MultivariateNormalMixtureExpectationMaximization.java b/src/main/java/org/apache/commons/math3/distribution/fitting/MultivariateNormalMixtureExpectationMaximization.java new file mode 100644 index 0000000..0b4ac0d --- /dev/null +++ b/src/main/java/org/apache/commons/math3/distribution/fitting/MultivariateNormalMixtureExpectationMaximization.java @@ -0,0 +1,454 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.distribution.fitting; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.apache.commons.math3.distribution.MultivariateNormalDistribution; +import org.apache.commons.math3.distribution.MixtureMultivariateNormalDistribution; +import org.apache.commons.math3.exception.ConvergenceException; +import org.apache.commons.math3.exception.DimensionMismatchException; +import org.apache.commons.math3.exception.NotStrictlyPositiveException; +import org.apache.commons.math3.exception.NumberIsTooSmallException; +import org.apache.commons.math3.exception.NumberIsTooLargeException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.linear.Array2DRowRealMatrix; +import org.apache.commons.math3.linear.RealMatrix; +import org.apache.commons.math3.linear.SingularMatrixException; +import org.apache.commons.math3.stat.correlation.Covariance; +import org.apache.commons.math3.util.FastMath; +import org.apache.commons.math3.util.MathArrays; +import org.apache.commons.math3.util.Pair; + +/** + * Expectation-Maximization</a> algorithm for fitting the parameters of + * multivariate normal mixture model distributions. + * + * This implementation is pure original code based on <a + * href="https://www.ee.washington.edu/techsite/papers/documents/UWEETR-2010-0002.pdf"> + * EM Demystified: An Expectation-Maximization Tutorial</a> by Yihua Chen and Maya R. Gupta, + * Department of Electrical Engineering, University of Washington, Seattle, WA 98195. + * It was verified using external tools like <a + * href="http://cran.r-project.org/web/packages/mixtools/index.html">CRAN Mixtools</a> + * (see the JUnit test cases) but it is <strong>not</strong> based on Mixtools code at all. + * The discussion of the origin of this class can be seen in the comments of the <a + * href="https://issues.apache.org/jira/browse/MATH-817">MATH-817</a> JIRA issue. + * @since 3.2 + */ +public class MultivariateNormalMixtureExpectationMaximization { + /** + * Default maximum number of iterations allowed per fitting process. + */ + private static final int DEFAULT_MAX_ITERATIONS = 1000; + /** + * Default convergence threshold for fitting. + */ + private static final double DEFAULT_THRESHOLD = 1E-5; + /** + * The data to fit. + */ + private final double[][] data; + /** + * The model fit against the data. + */ + private MixtureMultivariateNormalDistribution fittedModel; + /** + * The log likelihood of the data given the fitted model. + */ + private double logLikelihood = 0d; + + /** + * Creates an object to fit a multivariate normal mixture model to data. + * + * @param data Data to use in fitting procedure + * @throws NotStrictlyPositiveException if data has no rows + * @throws DimensionMismatchException if rows of data have different numbers + * of columns + * @throws NumberIsTooSmallException if the number of columns in the data is + * less than 2 + */ + public MultivariateNormalMixtureExpectationMaximization(double[][] data) + throws NotStrictlyPositiveException, + DimensionMismatchException, + NumberIsTooSmallException { + if (data.length < 1) { + throw new NotStrictlyPositiveException(data.length); + } + + this.data = new double[data.length][data[0].length]; + + for (int i = 0; i < data.length; i++) { + if (data[i].length != data[0].length) { + // Jagged arrays not allowed + throw new DimensionMismatchException(data[i].length, + data[0].length); + } + if (data[i].length < 2) { + throw new NumberIsTooSmallException(LocalizedFormats.NUMBER_TOO_SMALL, + data[i].length, 2, true); + } + this.data[i] = MathArrays.copyOf(data[i], data[i].length); + } + } + + /** + * Fit a mixture model to the data supplied to the constructor. + * + * The quality of the fit depends on the concavity of the data provided to + * the constructor and the initial mixture provided to this function. If the + * data has many local optima, multiple runs of the fitting function with + * different initial mixtures may be required to find the optimal solution. + * If a SingularMatrixException is encountered, it is possible that another + * initialization would work. + * + * @param initialMixture Model containing initial values of weights and + * multivariate normals + * @param maxIterations Maximum iterations allowed for fit + * @param threshold Convergence threshold computed as difference in + * logLikelihoods between successive iterations + * @throws SingularMatrixException if any component's covariance matrix is + * singular during fitting + * @throws NotStrictlyPositiveException if numComponents is less than one + * or threshold is less than Double.MIN_VALUE + * @throws DimensionMismatchException if initialMixture mean vector and data + * number of columns are not equal + */ + public void fit(final MixtureMultivariateNormalDistribution initialMixture, + final int maxIterations, + final double threshold) + throws SingularMatrixException, + NotStrictlyPositiveException, + DimensionMismatchException { + if (maxIterations < 1) { + throw new NotStrictlyPositiveException(maxIterations); + } + + if (threshold < Double.MIN_VALUE) { + throw new NotStrictlyPositiveException(threshold); + } + + final int n = data.length; + + // Number of data columns. Jagged data already rejected in constructor, + // so we can assume the lengths of each row are equal. + final int numCols = data[0].length; + final int k = initialMixture.getComponents().size(); + + final int numMeanColumns + = initialMixture.getComponents().get(0).getSecond().getMeans().length; + + if (numMeanColumns != numCols) { + throw new DimensionMismatchException(numMeanColumns, numCols); + } + + int numIterations = 0; + double previousLogLikelihood = 0d; + + logLikelihood = Double.NEGATIVE_INFINITY; + + // Initialize model to fit to initial mixture. + fittedModel = new MixtureMultivariateNormalDistribution(initialMixture.getComponents()); + + while (numIterations++ <= maxIterations && + FastMath.abs(previousLogLikelihood - logLikelihood) > threshold) { + previousLogLikelihood = logLikelihood; + double sumLogLikelihood = 0d; + + // Mixture components + final List<Pair<Double, MultivariateNormalDistribution>> components + = fittedModel.getComponents(); + + // Weight and distribution of each component + final double[] weights = new double[k]; + + final MultivariateNormalDistribution[] mvns = new MultivariateNormalDistribution[k]; + + for (int j = 0; j < k; j++) { + weights[j] = components.get(j).getFirst(); + mvns[j] = components.get(j).getSecond(); + } + + // E-step: compute the data dependent parameters of the expectation + // function. + // The percentage of row's total density between a row and a + // component + final double[][] gamma = new double[n][k]; + + // Sum of gamma for each component + final double[] gammaSums = new double[k]; + + // Sum of gamma times its row for each each component + final double[][] gammaDataProdSums = new double[k][numCols]; + + for (int i = 0; i < n; i++) { + final double rowDensity = fittedModel.density(data[i]); + sumLogLikelihood += FastMath.log(rowDensity); + + for (int j = 0; j < k; j++) { + gamma[i][j] = weights[j] * mvns[j].density(data[i]) / rowDensity; + gammaSums[j] += gamma[i][j]; + + for (int col = 0; col < numCols; col++) { + gammaDataProdSums[j][col] += gamma[i][j] * data[i][col]; + } + } + } + + logLikelihood = sumLogLikelihood / n; + + // M-step: compute the new parameters based on the expectation + // function. + final double[] newWeights = new double[k]; + final double[][] newMeans = new double[k][numCols]; + + for (int j = 0; j < k; j++) { + newWeights[j] = gammaSums[j] / n; + for (int col = 0; col < numCols; col++) { + newMeans[j][col] = gammaDataProdSums[j][col] / gammaSums[j]; + } + } + + // Compute new covariance matrices + final RealMatrix[] newCovMats = new RealMatrix[k]; + for (int j = 0; j < k; j++) { + newCovMats[j] = new Array2DRowRealMatrix(numCols, numCols); + } + for (int i = 0; i < n; i++) { + for (int j = 0; j < k; j++) { + final RealMatrix vec + = new Array2DRowRealMatrix(MathArrays.ebeSubtract(data[i], newMeans[j])); + final RealMatrix dataCov + = vec.multiply(vec.transpose()).scalarMultiply(gamma[i][j]); + newCovMats[j] = newCovMats[j].add(dataCov); + } + } + + // Converting to arrays for use by fitted model + final double[][][] newCovMatArrays = new double[k][numCols][numCols]; + for (int j = 0; j < k; j++) { + newCovMats[j] = newCovMats[j].scalarMultiply(1d / gammaSums[j]); + newCovMatArrays[j] = newCovMats[j].getData(); + } + + // Update current model + fittedModel = new MixtureMultivariateNormalDistribution(newWeights, + newMeans, + newCovMatArrays); + } + + if (FastMath.abs(previousLogLikelihood - logLikelihood) > threshold) { + // Did not converge before the maximum number of iterations + throw new ConvergenceException(); + } + } + + /** + * Fit a mixture model to the data supplied to the constructor. + * + * The quality of the fit depends on the concavity of the data provided to + * the constructor and the initial mixture provided to this function. If the + * data has many local optima, multiple runs of the fitting function with + * different initial mixtures may be required to find the optimal solution. + * If a SingularMatrixException is encountered, it is possible that another + * initialization would work. + * + * @param initialMixture Model containing initial values of weights and + * multivariate normals + * @throws SingularMatrixException if any component's covariance matrix is + * singular during fitting + * @throws NotStrictlyPositiveException if numComponents is less than one or + * threshold is less than Double.MIN_VALUE + */ + public void fit(MixtureMultivariateNormalDistribution initialMixture) + throws SingularMatrixException, + NotStrictlyPositiveException { + fit(initialMixture, DEFAULT_MAX_ITERATIONS, DEFAULT_THRESHOLD); + } + + /** + * Helper method to create a multivariate normal mixture model which can be + * used to initialize {@link #fit(MixtureMultivariateNormalDistribution)}. + * + * This method uses the data supplied to the constructor to try to determine + * a good mixture model at which to start the fit, but it is not guaranteed + * to supply a model which will find the optimal solution or even converge. + * + * @param data Data to estimate distribution + * @param numComponents Number of components for estimated mixture + * @return Multivariate normal mixture model estimated from the data + * @throws NumberIsTooLargeException if {@code numComponents} is greater + * than the number of data rows. + * @throws NumberIsTooSmallException if {@code numComponents < 2}. + * @throws NotStrictlyPositiveException if data has less than 2 rows + * @throws DimensionMismatchException if rows of data have different numbers + * of columns + */ + public static MixtureMultivariateNormalDistribution estimate(final double[][] data, + final int numComponents) + throws NotStrictlyPositiveException, + DimensionMismatchException { + if (data.length < 2) { + throw new NotStrictlyPositiveException(data.length); + } + if (numComponents < 2) { + throw new NumberIsTooSmallException(numComponents, 2, true); + } + if (numComponents > data.length) { + throw new NumberIsTooLargeException(numComponents, data.length, true); + } + + final int numRows = data.length; + final int numCols = data[0].length; + + // sort the data + final DataRow[] sortedData = new DataRow[numRows]; + for (int i = 0; i < numRows; i++) { + sortedData[i] = new DataRow(data[i]); + } + Arrays.sort(sortedData); + + // uniform weight for each bin + final double weight = 1d / numComponents; + + // components of mixture model to be created + final List<Pair<Double, MultivariateNormalDistribution>> components = + new ArrayList<Pair<Double, MultivariateNormalDistribution>>(numComponents); + + // create a component based on data in each bin + for (int binIndex = 0; binIndex < numComponents; binIndex++) { + // minimum index (inclusive) from sorted data for this bin + final int minIndex = (binIndex * numRows) / numComponents; + + // maximum index (exclusive) from sorted data for this bin + final int maxIndex = ((binIndex + 1) * numRows) / numComponents; + + // number of data records that will be in this bin + final int numBinRows = maxIndex - minIndex; + + // data for this bin + final double[][] binData = new double[numBinRows][numCols]; + + // mean of each column for the data in the this bin + final double[] columnMeans = new double[numCols]; + + // populate bin and create component + for (int i = minIndex, iBin = 0; i < maxIndex; i++, iBin++) { + for (int j = 0; j < numCols; j++) { + final double val = sortedData[i].getRow()[j]; + columnMeans[j] += val; + binData[iBin][j] = val; + } + } + + MathArrays.scaleInPlace(1d / numBinRows, columnMeans); + + // covariance matrix for this bin + final double[][] covMat + = new Covariance(binData).getCovarianceMatrix().getData(); + final MultivariateNormalDistribution mvn + = new MultivariateNormalDistribution(columnMeans, covMat); + + components.add(new Pair<Double, MultivariateNormalDistribution>(weight, mvn)); + } + + return new MixtureMultivariateNormalDistribution(components); + } + + /** + * Gets the log likelihood of the data under the fitted model. + * + * @return Log likelihood of data or zero of no data has been fit + */ + public double getLogLikelihood() { + return logLikelihood; + } + + /** + * Gets the fitted model. + * + * @return fitted model or {@code null} if no fit has been performed yet. + */ + public MixtureMultivariateNormalDistribution getFittedModel() { + return new MixtureMultivariateNormalDistribution(fittedModel.getComponents()); + } + + /** + * Class used for sorting user-supplied data. + */ + private static class DataRow implements Comparable<DataRow> { + /** One data row. */ + private final double[] row; + /** Mean of the data row. */ + private Double mean; + + /** + * Create a data row. + * @param data Data to use for the row + */ + DataRow(final double[] data) { + // Store reference. + row = data; + // Compute mean. + mean = 0d; + for (int i = 0; i < data.length; i++) { + mean += data[i]; + } + mean /= data.length; + } + + /** + * Compare two data rows. + * @param other The other row + * @return int for sorting + */ + public int compareTo(final DataRow other) { + return mean.compareTo(other.mean); + } + + /** {@inheritDoc} */ + @Override + public boolean equals(Object other) { + + if (this == other) { + return true; + } + + if (other instanceof DataRow) { + return MathArrays.equals(row, ((DataRow) other).row); + } + + return false; + + } + + /** {@inheritDoc} */ + @Override + public int hashCode() { + return Arrays.hashCode(row); + } + /** + * Get a data row. + * @return data row array + */ + public double[] getRow() { + return row; + } + } +} + diff --git a/src/main/java/org/apache/commons/math3/distribution/fitting/package-info.java b/src/main/java/org/apache/commons/math3/distribution/fitting/package-info.java new file mode 100644 index 0000000..aa95c6d --- /dev/null +++ b/src/main/java/org/apache/commons/math3/distribution/fitting/package-info.java @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Fitting of parameters against distributions. + */ +package org.apache.commons.math3.distribution.fitting; diff --git a/src/main/java/org/apache/commons/math3/distribution/package-info.java b/src/main/java/org/apache/commons/math3/distribution/package-info.java new file mode 100644 index 0000000..3a9fbc9 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/distribution/package-info.java @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** Implementations of common discrete and continuous distributions. */ +package org.apache.commons.math3.distribution; |