diff options
Diffstat (limited to 'src/main/java/org/apache/commons/math/distribution/ZipfDistributionImpl.java')
-rw-r--r-- | src/main/java/org/apache/commons/math/distribution/ZipfDistributionImpl.java | 286 |
1 files changed, 286 insertions, 0 deletions
diff --git a/src/main/java/org/apache/commons/math/distribution/ZipfDistributionImpl.java b/src/main/java/org/apache/commons/math/distribution/ZipfDistributionImpl.java new file mode 100644 index 0000000..9f19528 --- /dev/null +++ b/src/main/java/org/apache/commons/math/distribution/ZipfDistributionImpl.java @@ -0,0 +1,286 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.math.distribution; + +import java.io.Serializable; + +import org.apache.commons.math.MathRuntimeException; +import org.apache.commons.math.exception.util.LocalizedFormats; +import org.apache.commons.math.util.FastMath; + +/** + * Implementation for the {@link ZipfDistribution}. + * + * @version $Revision: 1054524 $ $Date: 2011-01-03 05:59:18 +0100 (lun. 03 janv. 2011) $ + */ +public class ZipfDistributionImpl extends AbstractIntegerDistribution + implements ZipfDistribution, Serializable { + + /** Serializable version identifier. */ + private static final long serialVersionUID = -140627372283420404L; + + /** Number of elements. */ + private int numberOfElements; + + /** Exponent parameter of the distribution. */ + private double exponent; + + /** + * Create a new Zipf distribution with the given number of elements and + * exponent. Both values must be positive; otherwise an + * <code>IllegalArgumentException</code> is thrown. + * + * @param numberOfElements the number of elements + * @param exponent the exponent + * @exception IllegalArgumentException if n ≤ 0 or s ≤ 0.0 + */ + public ZipfDistributionImpl(final int numberOfElements, final double exponent) + throws IllegalArgumentException { + setNumberOfElementsInternal(numberOfElements); + setExponentInternal(exponent); + } + + /** + * Get the number of elements (e.g. corpus size) for the distribution. + * + * @return the number of elements + */ + public int getNumberOfElements() { + return numberOfElements; + } + + /** + * Set the number of elements (e.g. corpus size) for the distribution. + * The parameter value must be positive; otherwise an + * <code>IllegalArgumentException</code> is thrown. + * + * @param n the number of elements + * @exception IllegalArgumentException if n ≤ 0 + * @deprecated as of 2.1 (class will become immutable in 3.0) + */ + @Deprecated + public void setNumberOfElements(final int n) { + setNumberOfElementsInternal(n); + } + /** + * Set the number of elements (e.g. corpus size) for the distribution. + * The parameter value must be positive; otherwise an + * <code>IllegalArgumentException</code> is thrown. + * + * @param n the number of elements + * @exception IllegalArgumentException if n ≤ 0 + */ + private void setNumberOfElementsInternal(final int n) + throws IllegalArgumentException { + if (n <= 0) { + throw MathRuntimeException.createIllegalArgumentException( + LocalizedFormats.INSUFFICIENT_DIMENSION, n, 0); + } + this.numberOfElements = n; + } + + /** + * Get the exponent characterising the distribution. + * + * @return the exponent + */ + public double getExponent() { + return exponent; + } + + /** + * Set the exponent characterising the distribution. + * The parameter value must be positive; otherwise an + * <code>IllegalArgumentException</code> is thrown. + * + * @param s the exponent + * @exception IllegalArgumentException if s ≤ 0.0 + * @deprecated as of 2.1 (class will become immutable in 3.0) + */ + @Deprecated + public void setExponent(final double s) { + setExponentInternal(s); + } + + /** + * Set the exponent characterising the distribution. + * The parameter value must be positive; otherwise an + * <code>IllegalArgumentException</code> is thrown. + * + * @param s the exponent + * @exception IllegalArgumentException if s ≤ 0.0 + */ + private void setExponentInternal(final double s) + throws IllegalArgumentException { + if (s <= 0.0) { + throw MathRuntimeException.createIllegalArgumentException( + LocalizedFormats.NOT_POSITIVE_EXPONENT, + s); + } + this.exponent = s; + } + + /** + * The probability mass function P(X = x) for a Zipf distribution. + * + * @param x the value at which the probability density function is evaluated. + * @return the value of the probability mass function at x + */ + public double probability(final int x) { + if (x <= 0 || x > numberOfElements) { + return 0.0; + } + + return (1.0 / FastMath.pow(x, exponent)) / generalizedHarmonic(numberOfElements, exponent); + + } + + /** + * The probability distribution function P(X <= x) for a Zipf distribution. + * + * @param x the value at which the PDF is evaluated. + * @return Zipf distribution function evaluated at x + */ + @Override + public double cumulativeProbability(final int x) { + if (x <= 0) { + return 0.0; + } else if (x >= numberOfElements) { + return 1.0; + } + + return generalizedHarmonic(x, exponent) / generalizedHarmonic(numberOfElements, exponent); + + } + + /** + * Access the domain value lower bound, based on <code>p</code>, used to + * bracket a PDF root. + * + * @param p the desired probability for the critical value + * @return domain value lower bound, i.e. + * P(X < <i>lower bound</i>) < <code>p</code> + */ + @Override + protected int getDomainLowerBound(final double p) { + return 0; + } + + /** + * Access the domain value upper bound, based on <code>p</code>, used to + * bracket a PDF root. + * + * @param p the desired probability for the critical value + * @return domain value upper bound, i.e. + * P(X < <i>upper bound</i>) > <code>p</code> + */ + @Override + protected int getDomainUpperBound(final double p) { + return numberOfElements; + } + + + /** + * Calculates the Nth generalized harmonic number. See + * <a href="http://mathworld.wolfram.com/HarmonicSeries.html">Harmonic + * Series</a>. + * + * @param n the term in the series to calculate (must be ≥ 1) + * @param m the exponent; special case m == 1.0 is the harmonic series + * @return the nth generalized harmonic number + */ + private double generalizedHarmonic(final int n, final double m) { + double value = 0; + for (int k = n; k > 0; --k) { + value += 1.0 / FastMath.pow(k, m); + } + return value; + } + + /** + * Returns the lower bound of the support for the distribution. + * + * The lower bound of the support is always 1 no matter the parameters. + * + * @return lower bound of the support (always 1) + * @since 2.2 + */ + public int getSupportLowerBound() { + return 1; + } + + /** + * Returns the upper bound of the support for the distribution. + * + * The upper bound of the support is the number of elements + * + * @return upper bound of the support + * @since 2.2 + */ + public int getSupportUpperBound() { + return getNumberOfElements(); + } + + /** + * Returns the mean. + * + * For number of elements N and exponent s, the mean is + * <code>Hs1 / Hs</code> where + * <ul> + * <li><code>Hs1 = generalizedHarmonic(N, s - 1)</code></li> + * <li><code>Hs = generalizedHarmonic(N, s)</code></li> + * </ul> + * + * @return the mean + * @since 2.2 + */ + protected double getNumericalMean() { + final int N = getNumberOfElements(); + final double s = getExponent(); + + final double Hs1 = generalizedHarmonic(N, s - 1); + final double Hs = generalizedHarmonic(N, s); + + return Hs1 / Hs; + } + + /** + * Returns the variance. + * + * For number of elements N and exponent s, the mean is + * <code>(Hs2 / Hs) - (Hs1^2 / Hs^2)</code> where + * <ul> + * <li><code>Hs2 = generalizedHarmonic(N, s - 2)</code></li> + * <li><code>Hs1 = generalizedHarmonic(N, s - 1)</code></li> + * <li><code>Hs = generalizedHarmonic(N, s)</code></li> + * </ul> + * + * @return the variance + * @since 2.2 + */ + protected double getNumericalVariance() { + final int N = getNumberOfElements(); + final double s = getExponent(); + + final double Hs2 = generalizedHarmonic(N, s - 2); + final double Hs1 = generalizedHarmonic(N, s - 1); + final double Hs = generalizedHarmonic(N, s); + + return (Hs2 / Hs) - ((Hs1 * Hs1) / (Hs * Hs)); + } +} |