summaryrefslogtreecommitdiff
path: root/src/main/java/org/apache/commons/math3/stat/regression
diff options
context:
space:
mode:
Diffstat (limited to 'src/main/java/org/apache/commons/math3/stat/regression')
-rw-r--r--src/main/java/org/apache/commons/math3/stat/regression/AbstractMultipleLinearRegression.java383
-rw-r--r--src/main/java/org/apache/commons/math3/stat/regression/GLSMultipleLinearRegression.java135
-rw-r--r--src/main/java/org/apache/commons/math3/stat/regression/MillerUpdatingRegression.java1101
-rw-r--r--src/main/java/org/apache/commons/math3/stat/regression/ModelSpecificationException.java40
-rw-r--r--src/main/java/org/apache/commons/math3/stat/regression/MultipleLinearRegression.java69
-rw-r--r--src/main/java/org/apache/commons/math3/stat/regression/OLSMultipleLinearRegression.java285
-rw-r--r--src/main/java/org/apache/commons/math3/stat/regression/RegressionResults.java421
-rw-r--r--src/main/java/org/apache/commons/math3/stat/regression/SimpleRegression.java881
-rw-r--r--src/main/java/org/apache/commons/math3/stat/regression/UpdatingMultipleLinearRegression.java93
-rw-r--r--src/main/java/org/apache/commons/math3/stat/regression/package-info.java22
10 files changed, 3430 insertions, 0 deletions
diff --git a/src/main/java/org/apache/commons/math3/stat/regression/AbstractMultipleLinearRegression.java b/src/main/java/org/apache/commons/math3/stat/regression/AbstractMultipleLinearRegression.java
new file mode 100644
index 0000000..9b7c40a
--- /dev/null
+++ b/src/main/java/org/apache/commons/math3/stat/regression/AbstractMultipleLinearRegression.java
@@ -0,0 +1,383 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math3.stat.regression;
+
+import org.apache.commons.math3.exception.DimensionMismatchException;
+import org.apache.commons.math3.exception.InsufficientDataException;
+import org.apache.commons.math3.exception.MathIllegalArgumentException;
+import org.apache.commons.math3.exception.NoDataException;
+import org.apache.commons.math3.exception.NullArgumentException;
+import org.apache.commons.math3.exception.util.LocalizedFormats;
+import org.apache.commons.math3.linear.NonSquareMatrixException;
+import org.apache.commons.math3.linear.RealMatrix;
+import org.apache.commons.math3.linear.Array2DRowRealMatrix;
+import org.apache.commons.math3.linear.RealVector;
+import org.apache.commons.math3.linear.ArrayRealVector;
+import org.apache.commons.math3.stat.descriptive.moment.Variance;
+import org.apache.commons.math3.util.FastMath;
+
+/**
+ * Abstract base class for implementations of MultipleLinearRegression.
+ * @since 2.0
+ */
+public abstract class AbstractMultipleLinearRegression implements
+ MultipleLinearRegression {
+
+ /** X sample data. */
+ private RealMatrix xMatrix;
+
+ /** Y sample data. */
+ private RealVector yVector;
+
+ /** Whether or not the regression model includes an intercept. True means no intercept. */
+ private boolean noIntercept = false;
+
+ /**
+ * @return the X sample data.
+ */
+ protected RealMatrix getX() {
+ return xMatrix;
+ }
+
+ /**
+ * @return the Y sample data.
+ */
+ protected RealVector getY() {
+ return yVector;
+ }
+
+ /**
+ * @return true if the model has no intercept term; false otherwise
+ * @since 2.2
+ */
+ public boolean isNoIntercept() {
+ return noIntercept;
+ }
+
+ /**
+ * @param noIntercept true means the model is to be estimated without an intercept term
+ * @since 2.2
+ */
+ public void setNoIntercept(boolean noIntercept) {
+ this.noIntercept = noIntercept;
+ }
+
+ /**
+ * <p>Loads model x and y sample data from a flat input array, overriding any previous sample.
+ * </p>
+ * <p>Assumes that rows are concatenated with y values first in each row. For example, an input
+ * <code>data</code> array containing the sequence of values (1, 2, 3, 4, 5, 6, 7, 8, 9) with
+ * <code>nobs = 3</code> and <code>nvars = 2</code> creates a regression dataset with two
+ * independent variables, as below:
+ * <pre>
+ * y x[0] x[1]
+ * --------------
+ * 1 2 3
+ * 4 5 6
+ * 7 8 9
+ * </pre>
+ * </p>
+ * <p>Note that there is no need to add an initial unitary column (column of 1's) when
+ * specifying a model including an intercept term. If {@link #isNoIntercept()} is <code>true</code>,
+ * the X matrix will be created without an initial column of "1"s; otherwise this column will
+ * be added.
+ * </p>
+ * <p>Throws IllegalArgumentException if any of the following preconditions fail:
+ * <ul><li><code>data</code> cannot be null</li>
+ * <li><code>data.length = nobs * (nvars + 1)</li>
+ * <li><code>nobs > nvars</code></li></ul>
+ * </p>
+ *
+ * @param data input data array
+ * @param nobs number of observations (rows)
+ * @param nvars number of independent variables (columns, not counting y)
+ * @throws NullArgumentException if the data array is null
+ * @throws DimensionMismatchException if the length of the data array is not equal
+ * to <code>nobs * (nvars + 1)</code>
+ * @throws InsufficientDataException if <code>nobs</code> is less than
+ * <code>nvars + 1</code>
+ */
+ public void newSampleData(double[] data, int nobs, int nvars) {
+ if (data == null) {
+ throw new NullArgumentException();
+ }
+ if (data.length != nobs * (nvars + 1)) {
+ throw new DimensionMismatchException(data.length, nobs * (nvars + 1));
+ }
+ if (nobs <= nvars) {
+ throw new InsufficientDataException(LocalizedFormats.INSUFFICIENT_OBSERVED_POINTS_IN_SAMPLE, nobs, nvars + 1);
+ }
+ double[] y = new double[nobs];
+ final int cols = noIntercept ? nvars: nvars + 1;
+ double[][] x = new double[nobs][cols];
+ int pointer = 0;
+ for (int i = 0; i < nobs; i++) {
+ y[i] = data[pointer++];
+ if (!noIntercept) {
+ x[i][0] = 1.0d;
+ }
+ for (int j = noIntercept ? 0 : 1; j < cols; j++) {
+ x[i][j] = data[pointer++];
+ }
+ }
+ this.xMatrix = new Array2DRowRealMatrix(x);
+ this.yVector = new ArrayRealVector(y);
+ }
+
+ /**
+ * Loads new y sample data, overriding any previous data.
+ *
+ * @param y the array representing the y sample
+ * @throws NullArgumentException if y is null
+ * @throws NoDataException if y is empty
+ */
+ protected void newYSampleData(double[] y) {
+ if (y == null) {
+ throw new NullArgumentException();
+ }
+ if (y.length == 0) {
+ throw new NoDataException();
+ }
+ this.yVector = new ArrayRealVector(y);
+ }
+
+ /**
+ * <p>Loads new x sample data, overriding any previous data.
+ * </p>
+ * The input <code>x</code> array should have one row for each sample
+ * observation, with columns corresponding to independent variables.
+ * For example, if <pre>
+ * <code> x = new double[][] {{1, 2}, {3, 4}, {5, 6}} </code></pre>
+ * then <code>setXSampleData(x) </code> results in a model with two independent
+ * variables and 3 observations:
+ * <pre>
+ * x[0] x[1]
+ * ----------
+ * 1 2
+ * 3 4
+ * 5 6
+ * </pre>
+ * </p>
+ * <p>Note that there is no need to add an initial unitary column (column of 1's) when
+ * specifying a model including an intercept term.
+ * </p>
+ * @param x the rectangular array representing the x sample
+ * @throws NullArgumentException if x is null
+ * @throws NoDataException if x is empty
+ * @throws DimensionMismatchException if x is not rectangular
+ */
+ protected void newXSampleData(double[][] x) {
+ if (x == null) {
+ throw new NullArgumentException();
+ }
+ if (x.length == 0) {
+ throw new NoDataException();
+ }
+ if (noIntercept) {
+ this.xMatrix = new Array2DRowRealMatrix(x, true);
+ } else { // Augment design matrix with initial unitary column
+ final int nVars = x[0].length;
+ final double[][] xAug = new double[x.length][nVars + 1];
+ for (int i = 0; i < x.length; i++) {
+ if (x[i].length != nVars) {
+ throw new DimensionMismatchException(x[i].length, nVars);
+ }
+ xAug[i][0] = 1.0d;
+ System.arraycopy(x[i], 0, xAug[i], 1, nVars);
+ }
+ this.xMatrix = new Array2DRowRealMatrix(xAug, false);
+ }
+ }
+
+ /**
+ * Validates sample data. Checks that
+ * <ul><li>Neither x nor y is null or empty;</li>
+ * <li>The length (i.e. number of rows) of x equals the length of y</li>
+ * <li>x has at least one more row than it has columns (i.e. there is
+ * sufficient data to estimate regression coefficients for each of the
+ * columns in x plus an intercept.</li>
+ * </ul>
+ *
+ * @param x the [n,k] array representing the x data
+ * @param y the [n,1] array representing the y data
+ * @throws NullArgumentException if {@code x} or {@code y} is null
+ * @throws DimensionMismatchException if {@code x} and {@code y} do not
+ * have the same length
+ * @throws NoDataException if {@code x} or {@code y} are zero-length
+ * @throws MathIllegalArgumentException if the number of rows of {@code x}
+ * is not larger than the number of columns + 1
+ */
+ protected void validateSampleData(double[][] x, double[] y) throws MathIllegalArgumentException {
+ if ((x == null) || (y == null)) {
+ throw new NullArgumentException();
+ }
+ if (x.length != y.length) {
+ throw new DimensionMismatchException(y.length, x.length);
+ }
+ if (x.length == 0) { // Must be no y data either
+ throw new NoDataException();
+ }
+ if (x[0].length + 1 > x.length) {
+ throw new MathIllegalArgumentException(
+ LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
+ x.length, x[0].length);
+ }
+ }
+
+ /**
+ * Validates that the x data and covariance matrix have the same
+ * number of rows and that the covariance matrix is square.
+ *
+ * @param x the [n,k] array representing the x sample
+ * @param covariance the [n,n] array representing the covariance matrix
+ * @throws DimensionMismatchException if the number of rows in x is not equal
+ * to the number of rows in covariance
+ * @throws NonSquareMatrixException if the covariance matrix is not square
+ */
+ protected void validateCovarianceData(double[][] x, double[][] covariance) {
+ if (x.length != covariance.length) {
+ throw new DimensionMismatchException(x.length, covariance.length);
+ }
+ if (covariance.length > 0 && covariance.length != covariance[0].length) {
+ throw new NonSquareMatrixException(covariance.length, covariance[0].length);
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public double[] estimateRegressionParameters() {
+ RealVector b = calculateBeta();
+ return b.toArray();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public double[] estimateResiduals() {
+ RealVector b = calculateBeta();
+ RealVector e = yVector.subtract(xMatrix.operate(b));
+ return e.toArray();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public double[][] estimateRegressionParametersVariance() {
+ return calculateBetaVariance().getData();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public double[] estimateRegressionParametersStandardErrors() {
+ double[][] betaVariance = estimateRegressionParametersVariance();
+ double sigma = calculateErrorVariance();
+ int length = betaVariance[0].length;
+ double[] result = new double[length];
+ for (int i = 0; i < length; i++) {
+ result[i] = FastMath.sqrt(sigma * betaVariance[i][i]);
+ }
+ return result;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public double estimateRegressandVariance() {
+ return calculateYVariance();
+ }
+
+ /**
+ * Estimates the variance of the error.
+ *
+ * @return estimate of the error variance
+ * @since 2.2
+ */
+ public double estimateErrorVariance() {
+ return calculateErrorVariance();
+
+ }
+
+ /**
+ * Estimates the standard error of the regression.
+ *
+ * @return regression standard error
+ * @since 2.2
+ */
+ public double estimateRegressionStandardError() {
+ return FastMath.sqrt(estimateErrorVariance());
+ }
+
+ /**
+ * Calculates the beta of multiple linear regression in matrix notation.
+ *
+ * @return beta
+ */
+ protected abstract RealVector calculateBeta();
+
+ /**
+ * Calculates the beta variance of multiple linear regression in matrix
+ * notation.
+ *
+ * @return beta variance
+ */
+ protected abstract RealMatrix calculateBetaVariance();
+
+
+ /**
+ * Calculates the variance of the y values.
+ *
+ * @return Y variance
+ */
+ protected double calculateYVariance() {
+ return new Variance().evaluate(yVector.toArray());
+ }
+
+ /**
+ * <p>Calculates the variance of the error term.</p>
+ * Uses the formula <pre>
+ * var(u) = u &middot; u / (n - k)
+ * </pre>
+ * where n and k are the row and column dimensions of the design
+ * matrix X.
+ *
+ * @return error variance estimate
+ * @since 2.2
+ */
+ protected double calculateErrorVariance() {
+ RealVector residuals = calculateResiduals();
+ return residuals.dotProduct(residuals) /
+ (xMatrix.getRowDimension() - xMatrix.getColumnDimension());
+ }
+
+ /**
+ * Calculates the residuals of multiple linear regression in matrix
+ * notation.
+ *
+ * <pre>
+ * u = y - X * b
+ * </pre>
+ *
+ * @return The residuals [n,1] matrix
+ */
+ protected RealVector calculateResiduals() {
+ RealVector b = calculateBeta();
+ return yVector.subtract(xMatrix.operate(b));
+ }
+
+}
diff --git a/src/main/java/org/apache/commons/math3/stat/regression/GLSMultipleLinearRegression.java b/src/main/java/org/apache/commons/math3/stat/regression/GLSMultipleLinearRegression.java
new file mode 100644
index 0000000..1644e6d
--- /dev/null
+++ b/src/main/java/org/apache/commons/math3/stat/regression/GLSMultipleLinearRegression.java
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math3.stat.regression;
+
+import org.apache.commons.math3.linear.LUDecomposition;
+import org.apache.commons.math3.linear.RealMatrix;
+import org.apache.commons.math3.linear.Array2DRowRealMatrix;
+import org.apache.commons.math3.linear.RealVector;
+
+/**
+ * The GLS implementation of multiple linear regression.
+ *
+ * GLS assumes a general covariance matrix Omega of the error
+ * <pre>
+ * u ~ N(0, Omega)
+ * </pre>
+ *
+ * Estimated by GLS,
+ * <pre>
+ * b=(X' Omega^-1 X)^-1X'Omega^-1 y
+ * </pre>
+ * whose variance is
+ * <pre>
+ * Var(b)=(X' Omega^-1 X)^-1
+ * </pre>
+ * @since 2.0
+ */
+public class GLSMultipleLinearRegression extends AbstractMultipleLinearRegression {
+
+ /** Covariance matrix. */
+ private RealMatrix Omega;
+
+ /** Inverse of covariance matrix. */
+ private RealMatrix OmegaInverse;
+
+ /** Replace sample data, overriding any previous sample.
+ * @param y y values of the sample
+ * @param x x values of the sample
+ * @param covariance array representing the covariance matrix
+ */
+ public void newSampleData(double[] y, double[][] x, double[][] covariance) {
+ validateSampleData(x, y);
+ newYSampleData(y);
+ newXSampleData(x);
+ validateCovarianceData(x, covariance);
+ newCovarianceData(covariance);
+ }
+
+ /**
+ * Add the covariance data.
+ *
+ * @param omega the [n,n] array representing the covariance
+ */
+ protected void newCovarianceData(double[][] omega){
+ this.Omega = new Array2DRowRealMatrix(omega);
+ this.OmegaInverse = null;
+ }
+
+ /**
+ * Get the inverse of the covariance.
+ * <p>The inverse of the covariance matrix is lazily evaluated and cached.</p>
+ * @return inverse of the covariance
+ */
+ protected RealMatrix getOmegaInverse() {
+ if (OmegaInverse == null) {
+ OmegaInverse = new LUDecomposition(Omega).getSolver().getInverse();
+ }
+ return OmegaInverse;
+ }
+
+ /**
+ * Calculates beta by GLS.
+ * <pre>
+ * b=(X' Omega^-1 X)^-1X'Omega^-1 y
+ * </pre>
+ * @return beta
+ */
+ @Override
+ protected RealVector calculateBeta() {
+ RealMatrix OI = getOmegaInverse();
+ RealMatrix XT = getX().transpose();
+ RealMatrix XTOIX = XT.multiply(OI).multiply(getX());
+ RealMatrix inverse = new LUDecomposition(XTOIX).getSolver().getInverse();
+ return inverse.multiply(XT).multiply(OI).operate(getY());
+ }
+
+ /**
+ * Calculates the variance on the beta.
+ * <pre>
+ * Var(b)=(X' Omega^-1 X)^-1
+ * </pre>
+ * @return The beta variance matrix
+ */
+ @Override
+ protected RealMatrix calculateBetaVariance() {
+ RealMatrix OI = getOmegaInverse();
+ RealMatrix XTOIX = getX().transpose().multiply(OI).multiply(getX());
+ return new LUDecomposition(XTOIX).getSolver().getInverse();
+ }
+
+
+ /**
+ * Calculates the estimated variance of the error term using the formula
+ * <pre>
+ * Var(u) = Tr(u' Omega^-1 u)/(n-k)
+ * </pre>
+ * where n and k are the row and column dimensions of the design
+ * matrix X.
+ *
+ * @return error variance
+ * @since 2.2
+ */
+ @Override
+ protected double calculateErrorVariance() {
+ RealVector residuals = calculateResiduals();
+ double t = residuals.dotProduct(getOmegaInverse().operate(residuals));
+ return t / (getX().getRowDimension() - getX().getColumnDimension());
+
+ }
+
+}
diff --git a/src/main/java/org/apache/commons/math3/stat/regression/MillerUpdatingRegression.java b/src/main/java/org/apache/commons/math3/stat/regression/MillerUpdatingRegression.java
new file mode 100644
index 0000000..3fe3c03
--- /dev/null
+++ b/src/main/java/org/apache/commons/math3/stat/regression/MillerUpdatingRegression.java
@@ -0,0 +1,1101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math3.stat.regression;
+
+import java.util.Arrays;
+import org.apache.commons.math3.exception.util.LocalizedFormats;
+import org.apache.commons.math3.util.FastMath;
+import org.apache.commons.math3.util.Precision;
+import org.apache.commons.math3.util.MathArrays;
+
+/**
+ * This class is a concrete implementation of the {@link UpdatingMultipleLinearRegression} interface.
+ *
+ * <p>The algorithm is described in: <pre>
+ * Algorithm AS 274: Least Squares Routines to Supplement Those of Gentleman
+ * Author(s): Alan J. Miller
+ * Source: Journal of the Royal Statistical Society.
+ * Series C (Applied Statistics), Vol. 41, No. 2
+ * (1992), pp. 458-478
+ * Published by: Blackwell Publishing for the Royal Statistical Society
+ * Stable URL: http://www.jstor.org/stable/2347583 </pre></p>
+ *
+ * <p>This method for multiple regression forms the solution to the OLS problem
+ * by updating the QR decomposition as described by Gentleman.</p>
+ *
+ * @since 3.0
+ */
+public class MillerUpdatingRegression implements UpdatingMultipleLinearRegression {
+
+ /** number of variables in regression */
+ private final int nvars;
+ /** diagonals of cross products matrix */
+ private final double[] d;
+ /** the elements of the R`Y */
+ private final double[] rhs;
+ /** the off diagonal portion of the R matrix */
+ private final double[] r;
+ /** the tolerance for each of the variables */
+ private final double[] tol;
+ /** residual sum of squares for all nested regressions */
+ private final double[] rss;
+ /** order of the regressors */
+ private final int[] vorder;
+ /** scratch space for tolerance calc */
+ private final double[] work_tolset;
+ /** number of observations entered */
+ private long nobs = 0;
+ /** sum of squared errors of largest regression */
+ private double sserr = 0.0;
+ /** has rss been called? */
+ private boolean rss_set = false;
+ /** has the tolerance setting method been called */
+ private boolean tol_set = false;
+ /** flags for variables with linear dependency problems */
+ private final boolean[] lindep;
+ /** singular x values */
+ private final double[] x_sing;
+ /** workspace for singularity method */
+ private final double[] work_sing;
+ /** summation of Y variable */
+ private double sumy = 0.0;
+ /** summation of squared Y values */
+ private double sumsqy = 0.0;
+ /** boolean flag whether a regression constant is added */
+ private boolean hasIntercept;
+ /** zero tolerance */
+ private final double epsilon;
+ /**
+ * Set the default constructor to private access
+ * to prevent inadvertent instantiation
+ */
+ @SuppressWarnings("unused")
+ private MillerUpdatingRegression() {
+ this(-1, false, Double.NaN);
+ }
+
+ /**
+ * This is the augmented constructor for the MillerUpdatingRegression class.
+ *
+ * @param numberOfVariables number of regressors to expect, not including constant
+ * @param includeConstant include a constant automatically
+ * @param errorTolerance zero tolerance, how machine zero is determined
+ * @throws ModelSpecificationException if {@code numberOfVariables is less than 1}
+ */
+ public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant, double errorTolerance)
+ throws ModelSpecificationException {
+ if (numberOfVariables < 1) {
+ throw new ModelSpecificationException(LocalizedFormats.NO_REGRESSORS);
+ }
+ if (includeConstant) {
+ this.nvars = numberOfVariables + 1;
+ } else {
+ this.nvars = numberOfVariables;
+ }
+ this.hasIntercept = includeConstant;
+ this.nobs = 0;
+ this.d = new double[this.nvars];
+ this.rhs = new double[this.nvars];
+ this.r = new double[this.nvars * (this.nvars - 1) / 2];
+ this.tol = new double[this.nvars];
+ this.rss = new double[this.nvars];
+ this.vorder = new int[this.nvars];
+ this.x_sing = new double[this.nvars];
+ this.work_sing = new double[this.nvars];
+ this.work_tolset = new double[this.nvars];
+ this.lindep = new boolean[this.nvars];
+ for (int i = 0; i < this.nvars; i++) {
+ vorder[i] = i;
+ }
+ if (errorTolerance > 0) {
+ this.epsilon = errorTolerance;
+ } else {
+ this.epsilon = -errorTolerance;
+ }
+ }
+
+ /**
+ * Primary constructor for the MillerUpdatingRegression.
+ *
+ * @param numberOfVariables maximum number of potential regressors
+ * @param includeConstant include a constant automatically
+ * @throws ModelSpecificationException if {@code numberOfVariables is less than 1}
+ */
+ public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant)
+ throws ModelSpecificationException {
+ this(numberOfVariables, includeConstant, Precision.EPSILON);
+ }
+
+ /**
+ * A getter method which determines whether a constant is included.
+ * @return true regression has an intercept, false no intercept
+ */
+ public boolean hasIntercept() {
+ return this.hasIntercept;
+ }
+
+ /**
+ * Gets the number of observations added to the regression model.
+ * @return number of observations
+ */
+ public long getN() {
+ return this.nobs;
+ }
+
+ /**
+ * Adds an observation to the regression model.
+ * @param x the array with regressor values
+ * @param y the value of dependent variable given these regressors
+ * @exception ModelSpecificationException if the length of {@code x} does not equal
+ * the number of independent variables in the model
+ */
+ public void addObservation(final double[] x, final double y)
+ throws ModelSpecificationException {
+
+ if ((!this.hasIntercept && x.length != nvars) ||
+ (this.hasIntercept && x.length + 1 != nvars)) {
+ throw new ModelSpecificationException(LocalizedFormats.INVALID_REGRESSION_OBSERVATION,
+ x.length, nvars);
+ }
+ if (!this.hasIntercept) {
+ include(MathArrays.copyOf(x, x.length), 1.0, y);
+ } else {
+ final double[] tmp = new double[x.length + 1];
+ System.arraycopy(x, 0, tmp, 1, x.length);
+ tmp[0] = 1.0;
+ include(tmp, 1.0, y);
+ }
+ ++nobs;
+
+ }
+
+ /**
+ * Adds multiple observations to the model.
+ * @param x observations on the regressors
+ * @param y observations on the regressand
+ * @throws ModelSpecificationException if {@code x} is not rectangular, does not match
+ * the length of {@code y} or does not contain sufficient data to estimate the model
+ */
+ public void addObservations(double[][] x, double[] y) throws ModelSpecificationException {
+ if ((x == null) || (y == null) || (x.length != y.length)) {
+ throw new ModelSpecificationException(
+ LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE,
+ (x == null) ? 0 : x.length,
+ (y == null) ? 0 : y.length);
+ }
+ if (x.length == 0) { // Must be no y data either
+ throw new ModelSpecificationException(
+ LocalizedFormats.NO_DATA);
+ }
+ if (x[0].length + 1 > x.length) {
+ throw new ModelSpecificationException(
+ LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
+ x.length, x[0].length);
+ }
+ for (int i = 0; i < x.length; i++) {
+ addObservation(x[i], y[i]);
+ }
+ }
+
+ /**
+ * The include method is where the QR decomposition occurs. This statement forms all
+ * intermediate data which will be used for all derivative measures.
+ * According to the miller paper, note that in the original implementation the x vector
+ * is overwritten. In this implementation, the include method is passed a copy of the
+ * original data vector so that there is no contamination of the data. Additionally,
+ * this method differs slightly from Gentleman's method, in that the assumption is
+ * of dense design matrices, there is some advantage in using the original gentleman algorithm
+ * on sparse matrices.
+ *
+ * @param x observations on the regressors
+ * @param wi weight of the this observation (-1,1)
+ * @param yi observation on the regressand
+ */
+ private void include(final double[] x, final double wi, final double yi) {
+ int nextr = 0;
+ double w = wi;
+ double y = yi;
+ double xi;
+ double di;
+ double wxi;
+ double dpi;
+ double xk;
+ double _w;
+ this.rss_set = false;
+ sumy = smartAdd(yi, sumy);
+ sumsqy = smartAdd(sumsqy, yi * yi);
+ for (int i = 0; i < x.length; i++) {
+ if (w == 0.0) {
+ return;
+ }
+ xi = x[i];
+
+ if (xi == 0.0) {
+ nextr += nvars - i - 1;
+ continue;
+ }
+ di = d[i];
+ wxi = w * xi;
+ _w = w;
+ if (di != 0.0) {
+ dpi = smartAdd(di, wxi * xi);
+ final double tmp = wxi * xi / di;
+ if (FastMath.abs(tmp) > Precision.EPSILON) {
+ w = (di * w) / dpi;
+ }
+ } else {
+ dpi = wxi * xi;
+ w = 0.0;
+ }
+ d[i] = dpi;
+ for (int k = i + 1; k < nvars; k++) {
+ xk = x[k];
+ x[k] = smartAdd(xk, -xi * r[nextr]);
+ if (di != 0.0) {
+ r[nextr] = smartAdd(di * r[nextr], (_w * xi) * xk) / dpi;
+ } else {
+ r[nextr] = xk / xi;
+ }
+ ++nextr;
+ }
+ xk = y;
+ y = smartAdd(xk, -xi * rhs[i]);
+ if (di != 0.0) {
+ rhs[i] = smartAdd(di * rhs[i], wxi * xk) / dpi;
+ } else {
+ rhs[i] = xk / xi;
+ }
+ }
+ sserr = smartAdd(sserr, w * y * y);
+ }
+
+ /**
+ * Adds to number a and b such that the contamination due to
+ * numerical smallness of one addend does not corrupt the sum.
+ * @param a - an addend
+ * @param b - an addend
+ * @return the sum of the a and b
+ */
+ private double smartAdd(double a, double b) {
+ final double _a = FastMath.abs(a);
+ final double _b = FastMath.abs(b);
+ if (_a > _b) {
+ final double eps = _a * Precision.EPSILON;
+ if (_b > eps) {
+ return a + b;
+ }
+ return a;
+ } else {
+ final double eps = _b * Precision.EPSILON;
+ if (_a > eps) {
+ return a + b;
+ }
+ return b;
+ }
+ }
+
+ /**
+ * As the name suggests, clear wipes the internals and reorders everything in the
+ * canonical order.
+ */
+ public void clear() {
+ Arrays.fill(this.d, 0.0);
+ Arrays.fill(this.rhs, 0.0);
+ Arrays.fill(this.r, 0.0);
+ Arrays.fill(this.tol, 0.0);
+ Arrays.fill(this.rss, 0.0);
+ Arrays.fill(this.work_tolset, 0.0);
+ Arrays.fill(this.work_sing, 0.0);
+ Arrays.fill(this.x_sing, 0.0);
+ Arrays.fill(this.lindep, false);
+ for (int i = 0; i < nvars; i++) {
+ this.vorder[i] = i;
+ }
+ this.nobs = 0;
+ this.sserr = 0.0;
+ this.sumy = 0.0;
+ this.sumsqy = 0.0;
+ this.rss_set = false;
+ this.tol_set = false;
+ }
+
+ /**
+ * This sets up tolerances for singularity testing.
+ */
+ private void tolset() {
+ int pos;
+ double total;
+ final double eps = this.epsilon;
+ for (int i = 0; i < nvars; i++) {
+ this.work_tolset[i] = FastMath.sqrt(d[i]);
+ }
+ tol[0] = eps * this.work_tolset[0];
+ for (int col = 1; col < nvars; col++) {
+ pos = col - 1;
+ total = work_tolset[col];
+ for (int row = 0; row < col; row++) {
+ total += FastMath.abs(r[pos]) * work_tolset[row];
+ pos += nvars - row - 2;
+ }
+ tol[col] = eps * total;
+ }
+ tol_set = true;
+ }
+
+ /**
+ * The regcf method conducts the linear regression and extracts the
+ * parameter vector. Notice that the algorithm can do subset regression
+ * with no alteration.
+ *
+ * @param nreq how many of the regressors to include (either in canonical
+ * order, or in the current reordered state)
+ * @return an array with the estimated slope coefficients
+ * @throws ModelSpecificationException if {@code nreq} is less than 1
+ * or greater than the number of independent variables
+ */
+ private double[] regcf(int nreq) throws ModelSpecificationException {
+ int nextr;
+ if (nreq < 1) {
+ throw new ModelSpecificationException(LocalizedFormats.NO_REGRESSORS);
+ }
+ if (nreq > this.nvars) {
+ throw new ModelSpecificationException(
+ LocalizedFormats.TOO_MANY_REGRESSORS, nreq, this.nvars);
+ }
+ if (!this.tol_set) {
+ tolset();
+ }
+ final double[] ret = new double[nreq];
+ boolean rankProblem = false;
+ for (int i = nreq - 1; i > -1; i--) {
+ if (FastMath.sqrt(d[i]) < tol[i]) {
+ ret[i] = 0.0;
+ d[i] = 0.0;
+ rankProblem = true;
+ } else {
+ ret[i] = rhs[i];
+ nextr = i * (nvars + nvars - i - 1) / 2;
+ for (int j = i + 1; j < nreq; j++) {
+ ret[i] = smartAdd(ret[i], -r[nextr] * ret[j]);
+ ++nextr;
+ }
+ }
+ }
+ if (rankProblem) {
+ for (int i = 0; i < nreq; i++) {
+ if (this.lindep[i]) {
+ ret[i] = Double.NaN;
+ }
+ }
+ }
+ return ret;
+ }
+
+ /**
+ * The method which checks for singularities and then eliminates the offending
+ * columns.
+ */
+ private void singcheck() {
+ int pos;
+ for (int i = 0; i < nvars; i++) {
+ work_sing[i] = FastMath.sqrt(d[i]);
+ }
+ for (int col = 0; col < nvars; col++) {
+ // Set elements within R to zero if they are less than tol(col) in
+ // absolute value after being scaled by the square root of their row
+ // multiplier
+ final double temp = tol[col];
+ pos = col - 1;
+ for (int row = 0; row < col - 1; row++) {
+ if (FastMath.abs(r[pos]) * work_sing[row] < temp) {
+ r[pos] = 0.0;
+ }
+ pos += nvars - row - 2;
+ }
+ // If diagonal element is near zero, set it to zero, set appropriate
+ // element of LINDEP, and use INCLUD to augment the projections in
+ // the lower rows of the orthogonalization.
+ lindep[col] = false;
+ if (work_sing[col] < temp) {
+ lindep[col] = true;
+ if (col < nvars - 1) {
+ Arrays.fill(x_sing, 0.0);
+ int _pi = col * (nvars + nvars - col - 1) / 2;
+ for (int _xi = col + 1; _xi < nvars; _xi++, _pi++) {
+ x_sing[_xi] = r[_pi];
+ r[_pi] = 0.0;
+ }
+ final double y = rhs[col];
+ final double weight = d[col];
+ d[col] = 0.0;
+ rhs[col] = 0.0;
+ this.include(x_sing, weight, y);
+ } else {
+ sserr += d[col] * rhs[col] * rhs[col];
+ }
+ }
+ }
+ }
+
+ /**
+ * Calculates the sum of squared errors for the full regression
+ * and all subsets in the following manner: <pre>
+ * rss[] ={
+ * ResidualSumOfSquares_allNvars,
+ * ResidualSumOfSquares_FirstNvars-1,
+ * ResidualSumOfSquares_FirstNvars-2,
+ * ..., ResidualSumOfSquares_FirstVariable} </pre>
+ */
+ private void ss() {
+ double total = sserr;
+ rss[nvars - 1] = sserr;
+ for (int i = nvars - 1; i > 0; i--) {
+ total += d[i] * rhs[i] * rhs[i];
+ rss[i - 1] = total;
+ }
+ rss_set = true;
+ }
+
+ /**
+ * Calculates the cov matrix assuming only the first nreq variables are
+ * included in the calculation. The returned array contains a symmetric
+ * matrix stored in lower triangular form. The matrix will have
+ * ( nreq + 1 ) * nreq / 2 elements. For illustration <pre>
+ * cov =
+ * {
+ * cov_00,
+ * cov_10, cov_11,
+ * cov_20, cov_21, cov22,
+ * ...
+ * } </pre>
+ *
+ * @param nreq how many of the regressors to include (either in canonical
+ * order, or in the current reordered state)
+ * @return an array with the variance covariance of the included
+ * regressors in lower triangular form
+ */
+ private double[] cov(int nreq) {
+ if (this.nobs <= nreq) {
+ return null;
+ }
+ double rnk = 0.0;
+ for (int i = 0; i < nreq; i++) {
+ if (!this.lindep[i]) {
+ rnk += 1.0;
+ }
+ }
+ final double var = rss[nreq - 1] / (nobs - rnk);
+ final double[] rinv = new double[nreq * (nreq - 1) / 2];
+ inverse(rinv, nreq);
+ final double[] covmat = new double[nreq * (nreq + 1) / 2];
+ Arrays.fill(covmat, Double.NaN);
+ int pos2;
+ int pos1;
+ int start = 0;
+ double total = 0;
+ for (int row = 0; row < nreq; row++) {
+ pos2 = start;
+ if (!this.lindep[row]) {
+ for (int col = row; col < nreq; col++) {
+ if (!this.lindep[col]) {
+ pos1 = start + col - row;
+ if (row == col) {
+ total = 1.0 / d[col];
+ } else {
+ total = rinv[pos1 - 1] / d[col];
+ }
+ for (int k = col + 1; k < nreq; k++) {
+ if (!this.lindep[k]) {
+ total += rinv[pos1] * rinv[pos2] / d[k];
+ }
+ ++pos1;
+ ++pos2;
+ }
+ covmat[ (col + 1) * col / 2 + row] = total * var;
+ } else {
+ pos2 += nreq - col - 1;
+ }
+ }
+ }
+ start += nreq - row - 1;
+ }
+ return covmat;
+ }
+
+ /**
+ * This internal method calculates the inverse of the upper-triangular portion
+ * of the R matrix.
+ * @param rinv the storage for the inverse of r
+ * @param nreq how many of the regressors to include (either in canonical
+ * order, or in the current reordered state)
+ */
+ private void inverse(double[] rinv, int nreq) {
+ int pos = nreq * (nreq - 1) / 2 - 1;
+ int pos1 = -1;
+ int pos2 = -1;
+ double total = 0.0;
+ Arrays.fill(rinv, Double.NaN);
+ for (int row = nreq - 1; row > 0; --row) {
+ if (!this.lindep[row]) {
+ final int start = (row - 1) * (nvars + nvars - row) / 2;
+ for (int col = nreq; col > row; --col) {
+ pos1 = start;
+ pos2 = pos;
+ total = 0.0;
+ for (int k = row; k < col - 1; k++) {
+ pos2 += nreq - k - 1;
+ if (!this.lindep[k]) {
+ total += -r[pos1] * rinv[pos2];
+ }
+ ++pos1;
+ }
+ rinv[pos] = total - r[pos1];
+ --pos;
+ }
+ } else {
+ pos -= nreq - row;
+ }
+ }
+ }
+
+ /**
+ * In the original algorithm only the partial correlations of the regressors
+ * is returned to the user. In this implementation, we have <pre>
+ * corr =
+ * {
+ * corrxx - lower triangular
+ * corrxy - bottom row of the matrix
+ * }
+ * Replaces subroutines PCORR and COR of:
+ * ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2 </pre>
+ *
+ * <p>Calculate partial correlations after the variables in rows
+ * 1, 2, ..., IN have been forced into the regression.
+ * If IN = 1, and the first row of R represents a constant in the
+ * model, then the usual simple correlations are returned.</p>
+ *
+ * <p>If IN = 0, the value returned in array CORMAT for the correlation
+ * of variables Xi & Xj is: <pre>
+ * sum ( Xi.Xj ) / Sqrt ( sum (Xi^2) . sum (Xj^2) )</pre></p>
+ *
+ * <p>On return, array CORMAT contains the upper triangle of the matrix of
+ * partial correlations stored by rows, excluding the 1's on the diagonal.
+ * e.g. if IN = 2, the consecutive elements returned are:
+ * (3,4) (3,5) ... (3,ncol), (4,5) (4,6) ... (4,ncol), etc.
+ * Array YCORR stores the partial correlations with the Y-variable
+ * starting with YCORR(IN+1) = partial correlation with the variable in
+ * position (IN+1). </p>
+ *
+ * @param in how many of the regressors to include (either in canonical
+ * order, or in the current reordered state)
+ * @return an array with the partial correlations of the remainder of
+ * regressors with each other and the regressand, in lower triangular form
+ */
+ public double[] getPartialCorrelations(int in) {
+ final double[] output = new double[(nvars - in + 1) * (nvars - in) / 2];
+ int pos;
+ int pos1;
+ int pos2;
+ final int rms_off = -in;
+ final int wrk_off = -(in + 1);
+ final double[] rms = new double[nvars - in];
+ final double[] work = new double[nvars - in - 1];
+ double sumxx;
+ double sumxy;
+ double sumyy;
+ final int offXX = (nvars - in) * (nvars - in - 1) / 2;
+ if (in < -1 || in >= nvars) {
+ return null;
+ }
+ final int nvm = nvars - 1;
+ final int base_pos = r.length - (nvm - in) * (nvm - in + 1) / 2;
+ if (d[in] > 0.0) {
+ rms[in + rms_off] = 1.0 / FastMath.sqrt(d[in]);
+ }
+ for (int col = in + 1; col < nvars; col++) {
+ pos = base_pos + col - 1 - in;
+ sumxx = d[col];
+ for (int row = in; row < col; row++) {
+ sumxx += d[row] * r[pos] * r[pos];
+ pos += nvars - row - 2;
+ }
+ if (sumxx > 0.0) {
+ rms[col + rms_off] = 1.0 / FastMath.sqrt(sumxx);
+ } else {
+ rms[col + rms_off] = 0.0;
+ }
+ }
+ sumyy = sserr;
+ for (int row = in; row < nvars; row++) {
+ sumyy += d[row] * rhs[row] * rhs[row];
+ }
+ if (sumyy > 0.0) {
+ sumyy = 1.0 / FastMath.sqrt(sumyy);
+ }
+ pos = 0;
+ for (int col1 = in; col1 < nvars; col1++) {
+ sumxy = 0.0;
+ Arrays.fill(work, 0.0);
+ pos1 = base_pos + col1 - in - 1;
+ for (int row = in; row < col1; row++) {
+ pos2 = pos1 + 1;
+ for (int col2 = col1 + 1; col2 < nvars; col2++) {
+ work[col2 + wrk_off] += d[row] * r[pos1] * r[pos2];
+ pos2++;
+ }
+ sumxy += d[row] * r[pos1] * rhs[row];
+ pos1 += nvars - row - 2;
+ }
+ pos2 = pos1 + 1;
+ for (int col2 = col1 + 1; col2 < nvars; col2++) {
+ work[col2 + wrk_off] += d[col1] * r[pos2];
+ ++pos2;
+ output[ (col2 - 1 - in) * (col2 - in) / 2 + col1 - in] =
+ work[col2 + wrk_off] * rms[col1 + rms_off] * rms[col2 + rms_off];
+ ++pos;
+ }
+ sumxy += d[col1] * rhs[col1];
+ output[col1 + rms_off + offXX] = sumxy * rms[col1 + rms_off] * sumyy;
+ }
+
+ return output;
+ }
+
+ /**
+ * ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2.
+ * Move variable from position FROM to position TO in an
+ * orthogonal reduction produced by AS75.1.
+ *
+ * @param from initial position
+ * @param to destination
+ */
+ private void vmove(int from, int to) {
+ double d1;
+ double d2;
+ double X;
+ double d1new;
+ double d2new;
+ double cbar;
+ double sbar;
+ double Y;
+ int first;
+ int inc;
+ int m1;
+ int m2;
+ int mp1;
+ int pos;
+ boolean bSkipTo40 = false;
+ if (from == to) {
+ return;
+ }
+ if (!this.rss_set) {
+ ss();
+ }
+ int count = 0;
+ if (from < to) {
+ first = from;
+ inc = 1;
+ count = to - from;
+ } else {
+ first = from - 1;
+ inc = -1;
+ count = from - to;
+ }
+
+ int m = first;
+ int idx = 0;
+ while (idx < count) {
+ m1 = m * (nvars + nvars - m - 1) / 2;
+ m2 = m1 + nvars - m - 1;
+ mp1 = m + 1;
+
+ d1 = d[m];
+ d2 = d[mp1];
+ // Special cases.
+ if (d1 > this.epsilon || d2 > this.epsilon) {
+ X = r[m1];
+ if (FastMath.abs(X) * FastMath.sqrt(d1) < tol[mp1]) {
+ X = 0.0;
+ }
+ if (d1 < this.epsilon || FastMath.abs(X) < this.epsilon) {
+ d[m] = d2;
+ d[mp1] = d1;
+ r[m1] = 0.0;
+ for (int col = m + 2; col < nvars; col++) {
+ ++m1;
+ X = r[m1];
+ r[m1] = r[m2];
+ r[m2] = X;
+ ++m2;
+ }
+ X = rhs[m];
+ rhs[m] = rhs[mp1];
+ rhs[mp1] = X;
+ bSkipTo40 = true;
+ //break;
+ } else if (d2 < this.epsilon) {
+ d[m] = d1 * X * X;
+ r[m1] = 1.0 / X;
+ for (int _i = m1 + 1; _i < m1 + nvars - m - 1; _i++) {
+ r[_i] /= X;
+ }
+ rhs[m] /= X;
+ bSkipTo40 = true;
+ //break;
+ }
+ if (!bSkipTo40) {
+ d1new = d2 + d1 * X * X;
+ cbar = d2 / d1new;
+ sbar = X * d1 / d1new;
+ d2new = d1 * cbar;
+ d[m] = d1new;
+ d[mp1] = d2new;
+ r[m1] = sbar;
+ for (int col = m + 2; col < nvars; col++) {
+ ++m1;
+ Y = r[m1];
+ r[m1] = cbar * r[m2] + sbar * Y;
+ r[m2] = Y - X * r[m2];
+ ++m2;
+ }
+ Y = rhs[m];
+ rhs[m] = cbar * rhs[mp1] + sbar * Y;
+ rhs[mp1] = Y - X * rhs[mp1];
+ }
+ }
+ if (m > 0) {
+ pos = m;
+ for (int row = 0; row < m; row++) {
+ X = r[pos];
+ r[pos] = r[pos - 1];
+ r[pos - 1] = X;
+ pos += nvars - row - 2;
+ }
+ }
+ // Adjust variable order (VORDER), the tolerances (TOL) and
+ // the vector of residual sums of squares (RSS).
+ m1 = vorder[m];
+ vorder[m] = vorder[mp1];
+ vorder[mp1] = m1;
+ X = tol[m];
+ tol[m] = tol[mp1];
+ tol[mp1] = X;
+ rss[m] = rss[mp1] + d[mp1] * rhs[mp1] * rhs[mp1];
+
+ m += inc;
+ ++idx;
+ }
+ }
+
+ /**
+ * ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2
+ *
+ * <p> Re-order the variables in an orthogonal reduction produced by
+ * AS75.1 so that the N variables in LIST start at position POS1,
+ * though will not necessarily be in the same order as in LIST.
+ * Any variables in VORDER before position POS1 are not moved.
+ * Auxiliary routine called: VMOVE. </p>
+ *
+ * <p>This internal method reorders the regressors.</p>
+ *
+ * @param list the regressors to move
+ * @param pos1 where the list will be placed
+ * @return -1 error, 0 everything ok
+ */
+ private int reorderRegressors(int[] list, int pos1) {
+ int next;
+ int i;
+ int l;
+ if (list.length < 1 || list.length > nvars + 1 - pos1) {
+ return -1;
+ }
+ next = pos1;
+ i = pos1;
+ while (i < nvars) {
+ l = vorder[i];
+ for (int j = 0; j < list.length; j++) {
+ if (l == list[j] && i > next) {
+ this.vmove(i, next);
+ ++next;
+ if (next >= list.length + pos1) {
+ return 0;
+ } else {
+ break;
+ }
+ }
+ }
+ ++i;
+ }
+ return 0;
+ }
+
+ /**
+ * Gets the diagonal of the Hat matrix also known as the leverage matrix.
+ *
+ * @param row_data returns the diagonal of the hat matrix for this observation
+ * @return the diagonal element of the hatmatrix
+ */
+ public double getDiagonalOfHatMatrix(double[] row_data) {
+ double[] wk = new double[this.nvars];
+ int pos;
+ double total;
+
+ if (row_data.length > nvars) {
+ return Double.NaN;
+ }
+ double[] xrow;
+ if (this.hasIntercept) {
+ xrow = new double[row_data.length + 1];
+ xrow[0] = 1.0;
+ System.arraycopy(row_data, 0, xrow, 1, row_data.length);
+ } else {
+ xrow = row_data;
+ }
+ double hii = 0.0;
+ for (int col = 0; col < xrow.length; col++) {
+ if (FastMath.sqrt(d[col]) < tol[col]) {
+ wk[col] = 0.0;
+ } else {
+ pos = col - 1;
+ total = xrow[col];
+ for (int row = 0; row < col; row++) {
+ total = smartAdd(total, -wk[row] * r[pos]);
+ pos += nvars - row - 2;
+ }
+ wk[col] = total;
+ hii = smartAdd(hii, (total * total) / d[col]);
+ }
+ }
+ return hii;
+ }
+
+ /**
+ * Gets the order of the regressors, useful if some type of reordering
+ * has been called. Calling regress with int[]{} args will trigger
+ * a reordering.
+ *
+ * @return int[] with the current order of the regressors
+ */
+ public int[] getOrderOfRegressors(){
+ return MathArrays.copyOf(vorder);
+ }
+
+ /**
+ * Conducts a regression on the data in the model, using all regressors.
+ *
+ * @return RegressionResults the structure holding all regression results
+ * @exception ModelSpecificationException - thrown if number of observations is
+ * less than the number of variables
+ */
+ public RegressionResults regress() throws ModelSpecificationException {
+ return regress(this.nvars);
+ }
+
+ /**
+ * Conducts a regression on the data in the model, using a subset of regressors.
+ *
+ * @param numberOfRegressors many of the regressors to include (either in canonical
+ * order, or in the current reordered state)
+ * @return RegressionResults the structure holding all regression results
+ * @exception ModelSpecificationException - thrown if number of observations is
+ * less than the number of variables or number of regressors requested
+ * is greater than the regressors in the model
+ */
+ public RegressionResults regress(int numberOfRegressors) throws ModelSpecificationException {
+ if (this.nobs <= numberOfRegressors) {
+ throw new ModelSpecificationException(
+ LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
+ this.nobs, numberOfRegressors);
+ }
+ if( numberOfRegressors > this.nvars ){
+ throw new ModelSpecificationException(
+ LocalizedFormats.TOO_MANY_REGRESSORS, numberOfRegressors, this.nvars);
+ }
+
+ tolset();
+ singcheck();
+
+ double[] beta = this.regcf(numberOfRegressors);
+
+ ss();
+
+ double[] cov = this.cov(numberOfRegressors);
+
+ int rnk = 0;
+ for (int i = 0; i < this.lindep.length; i++) {
+ if (!this.lindep[i]) {
+ ++rnk;
+ }
+ }
+
+ boolean needsReorder = false;
+ for (int i = 0; i < numberOfRegressors; i++) {
+ if (this.vorder[i] != i) {
+ needsReorder = true;
+ break;
+ }
+ }
+ if (!needsReorder) {
+ return new RegressionResults(
+ beta, new double[][]{cov}, true, this.nobs, rnk,
+ this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
+ } else {
+ double[] betaNew = new double[beta.length];
+ double[] covNew = new double[cov.length];
+
+ int[] newIndices = new int[beta.length];
+ for (int i = 0; i < nvars; i++) {
+ for (int j = 0; j < numberOfRegressors; j++) {
+ if (this.vorder[j] == i) {
+ betaNew[i] = beta[ j];
+ newIndices[i] = j;
+ }
+ }
+ }
+
+ int idx1 = 0;
+ int idx2;
+ int _i;
+ int _j;
+ for (int i = 0; i < beta.length; i++) {
+ _i = newIndices[i];
+ for (int j = 0; j <= i; j++, idx1++) {
+ _j = newIndices[j];
+ if (_i > _j) {
+ idx2 = _i * (_i + 1) / 2 + _j;
+ } else {
+ idx2 = _j * (_j + 1) / 2 + _i;
+ }
+ covNew[idx1] = cov[idx2];
+ }
+ }
+ return new RegressionResults(
+ betaNew, new double[][]{covNew}, true, this.nobs, rnk,
+ this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
+ }
+ }
+
+ /**
+ * Conducts a regression on the data in the model, using regressors in array
+ * Calling this method will change the internal order of the regressors
+ * and care is required in interpreting the hatmatrix.
+ *
+ * @param variablesToInclude array of variables to include in regression
+ * @return RegressionResults the structure holding all regression results
+ * @exception ModelSpecificationException - thrown if number of observations is
+ * less than the number of variables, the number of regressors requested
+ * is greater than the regressors in the model or a regressor index in
+ * regressor array does not exist
+ */
+ public RegressionResults regress(int[] variablesToInclude) throws ModelSpecificationException {
+ if (variablesToInclude.length > this.nvars) {
+ throw new ModelSpecificationException(
+ LocalizedFormats.TOO_MANY_REGRESSORS, variablesToInclude.length, this.nvars);
+ }
+ if (this.nobs <= this.nvars) {
+ throw new ModelSpecificationException(
+ LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
+ this.nobs, this.nvars);
+ }
+ Arrays.sort(variablesToInclude);
+ int iExclude = 0;
+ for (int i = 0; i < variablesToInclude.length; i++) {
+ if (i >= this.nvars) {
+ throw new ModelSpecificationException(
+ LocalizedFormats.INDEX_LARGER_THAN_MAX, i, this.nvars);
+ }
+ if (i > 0 && variablesToInclude[i] == variablesToInclude[i - 1]) {
+ variablesToInclude[i] = -1;
+ ++iExclude;
+ }
+ }
+ int[] series;
+ if (iExclude > 0) {
+ int j = 0;
+ series = new int[variablesToInclude.length - iExclude];
+ for (int i = 0; i < variablesToInclude.length; i++) {
+ if (variablesToInclude[i] > -1) {
+ series[j] = variablesToInclude[i];
+ ++j;
+ }
+ }
+ } else {
+ series = variablesToInclude;
+ }
+
+ reorderRegressors(series, 0);
+ tolset();
+ singcheck();
+
+ double[] beta = this.regcf(series.length);
+
+ ss();
+
+ double[] cov = this.cov(series.length);
+
+ int rnk = 0;
+ for (int i = 0; i < this.lindep.length; i++) {
+ if (!this.lindep[i]) {
+ ++rnk;
+ }
+ }
+
+ boolean needsReorder = false;
+ for (int i = 0; i < this.nvars; i++) {
+ if (this.vorder[i] != series[i]) {
+ needsReorder = true;
+ break;
+ }
+ }
+ if (!needsReorder) {
+ return new RegressionResults(
+ beta, new double[][]{cov}, true, this.nobs, rnk,
+ this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
+ } else {
+ double[] betaNew = new double[beta.length];
+ int[] newIndices = new int[beta.length];
+ for (int i = 0; i < series.length; i++) {
+ for (int j = 0; j < this.vorder.length; j++) {
+ if (this.vorder[j] == series[i]) {
+ betaNew[i] = beta[ j];
+ newIndices[i] = j;
+ }
+ }
+ }
+ double[] covNew = new double[cov.length];
+ int idx1 = 0;
+ int idx2;
+ int _i;
+ int _j;
+ for (int i = 0; i < beta.length; i++) {
+ _i = newIndices[i];
+ for (int j = 0; j <= i; j++, idx1++) {
+ _j = newIndices[j];
+ if (_i > _j) {
+ idx2 = _i * (_i + 1) / 2 + _j;
+ } else {
+ idx2 = _j * (_j + 1) / 2 + _i;
+ }
+ covNew[idx1] = cov[idx2];
+ }
+ }
+ return new RegressionResults(
+ betaNew, new double[][]{covNew}, true, this.nobs, rnk,
+ this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
+ }
+ }
+}
diff --git a/src/main/java/org/apache/commons/math3/stat/regression/ModelSpecificationException.java b/src/main/java/org/apache/commons/math3/stat/regression/ModelSpecificationException.java
new file mode 100644
index 0000000..f3804db
--- /dev/null
+++ b/src/main/java/org/apache/commons/math3/stat/regression/ModelSpecificationException.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math3.stat.regression;
+
+import org.apache.commons.math3.exception.MathIllegalArgumentException;
+import org.apache.commons.math3.exception.util.Localizable;
+
+/**
+ * Exception thrown when a regression model is not correctly specified.
+ *
+ * @since 3.0
+ */
+public class ModelSpecificationException extends MathIllegalArgumentException {
+ /** Serializable version Id. */
+ private static final long serialVersionUID = 4206514456095401070L;
+
+ /**
+ * @param pattern message pattern describing the specification error.
+ *
+ * @param args arguments.
+ */
+ public ModelSpecificationException(Localizable pattern,
+ Object ... args) {
+ super(pattern, args);
+ }
+}
diff --git a/src/main/java/org/apache/commons/math3/stat/regression/MultipleLinearRegression.java b/src/main/java/org/apache/commons/math3/stat/regression/MultipleLinearRegression.java
new file mode 100644
index 0000000..866214f
--- /dev/null
+++ b/src/main/java/org/apache/commons/math3/stat/regression/MultipleLinearRegression.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math3.stat.regression;
+
+/**
+ * The multiple linear regression can be represented in matrix-notation.
+ * <pre>
+ * y=X*b+u
+ * </pre>
+ * where y is an <code>n-vector</code> <b>regressand</b>, X is a <code>[n,k]</code> matrix whose <code>k</code> columns are called
+ * <b>regressors</b>, b is <code>k-vector</code> of <b>regression parameters</b> and <code>u</code> is an <code>n-vector</code>
+ * of <b>error terms</b> or <b>residuals</b>.
+ *
+ * The notation is quite standard in literature,
+ * cf eg <a href="http://www.econ.queensu.ca/ETM">Davidson and MacKinnon, Econometrics Theory and Methods, 2004</a>.
+ * @since 2.0
+ */
+public interface MultipleLinearRegression {
+
+ /**
+ * Estimates the regression parameters b.
+ *
+ * @return The [k,1] array representing b
+ */
+ double[] estimateRegressionParameters();
+
+ /**
+ * Estimates the variance of the regression parameters, ie Var(b).
+ *
+ * @return The [k,k] array representing the variance of b
+ */
+ double[][] estimateRegressionParametersVariance();
+
+ /**
+ * Estimates the residuals, ie u = y - X*b.
+ *
+ * @return The [n,1] array representing the residuals
+ */
+ double[] estimateResiduals();
+
+ /**
+ * Returns the variance of the regressand, ie Var(y).
+ *
+ * @return The double representing the variance of y
+ */
+ double estimateRegressandVariance();
+
+ /**
+ * Returns the standard errors of the regression parameters.
+ *
+ * @return standard errors of estimated regression parameters
+ */
+ double[] estimateRegressionParametersStandardErrors();
+
+}
diff --git a/src/main/java/org/apache/commons/math3/stat/regression/OLSMultipleLinearRegression.java b/src/main/java/org/apache/commons/math3/stat/regression/OLSMultipleLinearRegression.java
new file mode 100644
index 0000000..7fff940
--- /dev/null
+++ b/src/main/java/org/apache/commons/math3/stat/regression/OLSMultipleLinearRegression.java
@@ -0,0 +1,285 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math3.stat.regression;
+
+import org.apache.commons.math3.exception.MathIllegalArgumentException;
+import org.apache.commons.math3.linear.Array2DRowRealMatrix;
+import org.apache.commons.math3.linear.LUDecomposition;
+import org.apache.commons.math3.linear.QRDecomposition;
+import org.apache.commons.math3.linear.RealMatrix;
+import org.apache.commons.math3.linear.RealVector;
+import org.apache.commons.math3.stat.StatUtils;
+import org.apache.commons.math3.stat.descriptive.moment.SecondMoment;
+
+/**
+ * <p>Implements ordinary least squares (OLS) to estimate the parameters of a
+ * multiple linear regression model.</p>
+ *
+ * <p>The regression coefficients, <code>b</code>, satisfy the normal equations:
+ * <pre><code> X<sup>T</sup> X b = X<sup>T</sup> y </code></pre></p>
+ *
+ * <p>To solve the normal equations, this implementation uses QR decomposition
+ * of the <code>X</code> matrix. (See {@link QRDecomposition} for details on the
+ * decomposition algorithm.) The <code>X</code> matrix, also known as the <i>design matrix,</i>
+ * has rows corresponding to sample observations and columns corresponding to independent
+ * variables. When the model is estimated using an intercept term (i.e. when
+ * {@link #isNoIntercept() isNoIntercept} is false as it is by default), the <code>X</code>
+ * matrix includes an initial column identically equal to 1. We solve the normal equations
+ * as follows:
+ * <pre><code> X<sup>T</sup>X b = X<sup>T</sup> y
+ * (QR)<sup>T</sup> (QR) b = (QR)<sup>T</sup>y
+ * R<sup>T</sup> (Q<sup>T</sup>Q) R b = R<sup>T</sup> Q<sup>T</sup> y
+ * R<sup>T</sup> R b = R<sup>T</sup> Q<sup>T</sup> y
+ * (R<sup>T</sup>)<sup>-1</sup> R<sup>T</sup> R b = (R<sup>T</sup>)<sup>-1</sup> R<sup>T</sup> Q<sup>T</sup> y
+ * R b = Q<sup>T</sup> y </code></pre></p>
+ *
+ * <p>Given <code>Q</code> and <code>R</code>, the last equation is solved by back-substitution.</p>
+ *
+ * @since 2.0
+ */
+public class OLSMultipleLinearRegression extends AbstractMultipleLinearRegression {
+
+ /** Cached QR decomposition of X matrix */
+ private QRDecomposition qr = null;
+
+ /** Singularity threshold for QR decomposition */
+ private final double threshold;
+
+ /**
+ * Create an empty OLSMultipleLinearRegression instance.
+ */
+ public OLSMultipleLinearRegression() {
+ this(0d);
+ }
+
+ /**
+ * Create an empty OLSMultipleLinearRegression instance, using the given
+ * singularity threshold for the QR decomposition.
+ *
+ * @param threshold the singularity threshold
+ * @since 3.3
+ */
+ public OLSMultipleLinearRegression(final double threshold) {
+ this.threshold = threshold;
+ }
+
+ /**
+ * Loads model x and y sample data, overriding any previous sample.
+ *
+ * Computes and caches QR decomposition of the X matrix.
+ * @param y the [n,1] array representing the y sample
+ * @param x the [n,k] array representing the x sample
+ * @throws MathIllegalArgumentException if the x and y array data are not
+ * compatible for the regression
+ */
+ public void newSampleData(double[] y, double[][] x) throws MathIllegalArgumentException {
+ validateSampleData(x, y);
+ newYSampleData(y);
+ newXSampleData(x);
+ }
+
+ /**
+ * {@inheritDoc}
+ * <p>This implementation computes and caches the QR decomposition of the X matrix.</p>
+ */
+ @Override
+ public void newSampleData(double[] data, int nobs, int nvars) {
+ super.newSampleData(data, nobs, nvars);
+ qr = new QRDecomposition(getX(), threshold);
+ }
+
+ /**
+ * <p>Compute the "hat" matrix.
+ * </p>
+ * <p>The hat matrix is defined in terms of the design matrix X
+ * by X(X<sup>T</sup>X)<sup>-1</sup>X<sup>T</sup>
+ * </p>
+ * <p>The implementation here uses the QR decomposition to compute the
+ * hat matrix as Q I<sub>p</sub>Q<sup>T</sup> where I<sub>p</sub> is the
+ * p-dimensional identity matrix augmented by 0's. This computational
+ * formula is from "The Hat Matrix in Regression and ANOVA",
+ * David C. Hoaglin and Roy E. Welsch,
+ * <i>The American Statistician</i>, Vol. 32, No. 1 (Feb., 1978), pp. 17-22.
+ * </p>
+ * <p>Data for the model must have been successfully loaded using one of
+ * the {@code newSampleData} methods before invoking this method; otherwise
+ * a {@code NullPointerException} will be thrown.</p>
+ *
+ * @return the hat matrix
+ * @throws NullPointerException unless method {@code newSampleData} has been
+ * called beforehand.
+ */
+ public RealMatrix calculateHat() {
+ // Create augmented identity matrix
+ RealMatrix Q = qr.getQ();
+ final int p = qr.getR().getColumnDimension();
+ final int n = Q.getColumnDimension();
+ // No try-catch or advertised NotStrictlyPositiveException - NPE above if n < 3
+ Array2DRowRealMatrix augI = new Array2DRowRealMatrix(n, n);
+ double[][] augIData = augI.getDataRef();
+ for (int i = 0; i < n; i++) {
+ for (int j =0; j < n; j++) {
+ if (i == j && i < p) {
+ augIData[i][j] = 1d;
+ } else {
+ augIData[i][j] = 0d;
+ }
+ }
+ }
+
+ // Compute and return Hat matrix
+ // No DME advertised - args valid if we get here
+ return Q.multiply(augI).multiply(Q.transpose());
+ }
+
+ /**
+ * <p>Returns the sum of squared deviations of Y from its mean.</p>
+ *
+ * <p>If the model has no intercept term, <code>0</code> is used for the
+ * mean of Y - i.e., what is returned is the sum of the squared Y values.</p>
+ *
+ * <p>The value returned by this method is the SSTO value used in
+ * the {@link #calculateRSquared() R-squared} computation.</p>
+ *
+ * @return SSTO - the total sum of squares
+ * @throws NullPointerException if the sample has not been set
+ * @see #isNoIntercept()
+ * @since 2.2
+ */
+ public double calculateTotalSumOfSquares() {
+ if (isNoIntercept()) {
+ return StatUtils.sumSq(getY().toArray());
+ } else {
+ return new SecondMoment().evaluate(getY().toArray());
+ }
+ }
+
+ /**
+ * Returns the sum of squared residuals.
+ *
+ * @return residual sum of squares
+ * @since 2.2
+ * @throws org.apache.commons.math3.linear.SingularMatrixException if the design matrix is singular
+ * @throws NullPointerException if the data for the model have not been loaded
+ */
+ public double calculateResidualSumOfSquares() {
+ final RealVector residuals = calculateResiduals();
+ // No advertised DME, args are valid
+ return residuals.dotProduct(residuals);
+ }
+
+ /**
+ * Returns the R-Squared statistic, defined by the formula <pre>
+ * R<sup>2</sup> = 1 - SSR / SSTO
+ * </pre>
+ * where SSR is the {@link #calculateResidualSumOfSquares() sum of squared residuals}
+ * and SSTO is the {@link #calculateTotalSumOfSquares() total sum of squares}
+ *
+ * <p>If there is no variance in y, i.e., SSTO = 0, NaN is returned.</p>
+ *
+ * @return R-square statistic
+ * @throws NullPointerException if the sample has not been set
+ * @throws org.apache.commons.math3.linear.SingularMatrixException if the design matrix is singular
+ * @since 2.2
+ */
+ public double calculateRSquared() {
+ return 1 - calculateResidualSumOfSquares() / calculateTotalSumOfSquares();
+ }
+
+ /**
+ * <p>Returns the adjusted R-squared statistic, defined by the formula <pre>
+ * R<sup>2</sup><sub>adj</sub> = 1 - [SSR (n - 1)] / [SSTO (n - p)]
+ * </pre>
+ * where SSR is the {@link #calculateResidualSumOfSquares() sum of squared residuals},
+ * SSTO is the {@link #calculateTotalSumOfSquares() total sum of squares}, n is the number
+ * of observations and p is the number of parameters estimated (including the intercept).</p>
+ *
+ * <p>If the regression is estimated without an intercept term, what is returned is <pre>
+ * <code> 1 - (1 - {@link #calculateRSquared()}) * (n / (n - p)) </code>
+ * </pre></p>
+ *
+ * <p>If there is no variance in y, i.e., SSTO = 0, NaN is returned.</p>
+ *
+ * @return adjusted R-Squared statistic
+ * @throws NullPointerException if the sample has not been set
+ * @throws org.apache.commons.math3.linear.SingularMatrixException if the design matrix is singular
+ * @see #isNoIntercept()
+ * @since 2.2
+ */
+ public double calculateAdjustedRSquared() {
+ final double n = getX().getRowDimension();
+ if (isNoIntercept()) {
+ return 1 - (1 - calculateRSquared()) * (n / (n - getX().getColumnDimension()));
+ } else {
+ return 1 - (calculateResidualSumOfSquares() * (n - 1)) /
+ (calculateTotalSumOfSquares() * (n - getX().getColumnDimension()));
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ * <p>This implementation computes and caches the QR decomposition of the X matrix
+ * once it is successfully loaded.</p>
+ */
+ @Override
+ protected void newXSampleData(double[][] x) {
+ super.newXSampleData(x);
+ qr = new QRDecomposition(getX(), threshold);
+ }
+
+ /**
+ * Calculates the regression coefficients using OLS.
+ *
+ * <p>Data for the model must have been successfully loaded using one of
+ * the {@code newSampleData} methods before invoking this method; otherwise
+ * a {@code NullPointerException} will be thrown.</p>
+ *
+ * @return beta
+ * @throws org.apache.commons.math3.linear.SingularMatrixException if the design matrix is singular
+ * @throws NullPointerException if the data for the model have not been loaded
+ */
+ @Override
+ protected RealVector calculateBeta() {
+ return qr.getSolver().solve(getY());
+ }
+
+ /**
+ * <p>Calculates the variance-covariance matrix of the regression parameters.
+ * </p>
+ * <p>Var(b) = (X<sup>T</sup>X)<sup>-1</sup>
+ * </p>
+ * <p>Uses QR decomposition to reduce (X<sup>T</sup>X)<sup>-1</sup>
+ * to (R<sup>T</sup>R)<sup>-1</sup>, with only the top p rows of
+ * R included, where p = the length of the beta vector.</p>
+ *
+ * <p>Data for the model must have been successfully loaded using one of
+ * the {@code newSampleData} methods before invoking this method; otherwise
+ * a {@code NullPointerException} will be thrown.</p>
+ *
+ * @return The beta variance-covariance matrix
+ * @throws org.apache.commons.math3.linear.SingularMatrixException if the design matrix is singular
+ * @throws NullPointerException if the data for the model have not been loaded
+ */
+ @Override
+ protected RealMatrix calculateBetaVariance() {
+ int p = getX().getColumnDimension();
+ RealMatrix Raug = qr.getR().getSubMatrix(0, p - 1 , 0, p - 1);
+ RealMatrix Rinv = new LUDecomposition(Raug).getSolver().getInverse();
+ return Rinv.multiply(Rinv.transpose());
+ }
+
+}
diff --git a/src/main/java/org/apache/commons/math3/stat/regression/RegressionResults.java b/src/main/java/org/apache/commons/math3/stat/regression/RegressionResults.java
new file mode 100644
index 0000000..70faeac
--- /dev/null
+++ b/src/main/java/org/apache/commons/math3/stat/regression/RegressionResults.java
@@ -0,0 +1,421 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math3.stat.regression;
+
+import java.io.Serializable;
+import java.util.Arrays;
+import org.apache.commons.math3.util.FastMath;
+import org.apache.commons.math3.util.MathArrays;
+import org.apache.commons.math3.exception.OutOfRangeException;
+
+/**
+ * Results of a Multiple Linear Regression model fit.
+ *
+ * @since 3.0
+ */
+public class RegressionResults implements Serializable {
+
+ /** INDEX of Sum of Squared Errors */
+ private static final int SSE_IDX = 0;
+ /** INDEX of Sum of Squares of Model */
+ private static final int SST_IDX = 1;
+ /** INDEX of R-Squared of regression */
+ private static final int RSQ_IDX = 2;
+ /** INDEX of Mean Squared Error */
+ private static final int MSE_IDX = 3;
+ /** INDEX of Adjusted R Squared */
+ private static final int ADJRSQ_IDX = 4;
+ /** UID */
+ private static final long serialVersionUID = 1l;
+ /** regression slope parameters */
+ private final double[] parameters;
+ /** variance covariance matrix of parameters */
+ private final double[][] varCovData;
+ /** boolean flag for variance covariance matrix in symm compressed storage */
+ private final boolean isSymmetricVCD;
+ /** rank of the solution */
+ @SuppressWarnings("unused")
+ private final int rank;
+ /** number of observations on which results are based */
+ private final long nobs;
+ /** boolean flag indicator of whether a constant was included*/
+ private final boolean containsConstant;
+ /** array storing global results, SSE, MSE, RSQ, adjRSQ */
+ private final double[] globalFitInfo;
+
+ /**
+ * Set the default constructor to private access
+ * to prevent inadvertent instantiation
+ */
+ @SuppressWarnings("unused")
+ private RegressionResults() {
+ this.parameters = null;
+ this.varCovData = null;
+ this.rank = -1;
+ this.nobs = -1;
+ this.containsConstant = false;
+ this.isSymmetricVCD = false;
+ this.globalFitInfo = null;
+ }
+
+ /**
+ * Constructor for Regression Results.
+ *
+ * @param parameters a double array with the regression slope estimates
+ * @param varcov the variance covariance matrix, stored either in a square matrix
+ * or as a compressed
+ * @param isSymmetricCompressed a flag which denotes that the variance covariance
+ * matrix is in symmetric compressed format
+ * @param nobs the number of observations of the regression estimation
+ * @param rank the number of independent variables in the regression
+ * @param sumy the sum of the independent variable
+ * @param sumysq the sum of the squared independent variable
+ * @param sse sum of squared errors
+ * @param containsConstant true model has constant, false model does not have constant
+ * @param copyData if true a deep copy of all input data is made, if false only references
+ * are copied and the RegressionResults become mutable
+ */
+ public RegressionResults(
+ final double[] parameters, final double[][] varcov,
+ final boolean isSymmetricCompressed,
+ final long nobs, final int rank,
+ final double sumy, final double sumysq, final double sse,
+ final boolean containsConstant,
+ final boolean copyData) {
+ if (copyData) {
+ this.parameters = MathArrays.copyOf(parameters);
+ this.varCovData = new double[varcov.length][];
+ for (int i = 0; i < varcov.length; i++) {
+ this.varCovData[i] = MathArrays.copyOf(varcov[i]);
+ }
+ } else {
+ this.parameters = parameters;
+ this.varCovData = varcov;
+ }
+ this.isSymmetricVCD = isSymmetricCompressed;
+ this.nobs = nobs;
+ this.rank = rank;
+ this.containsConstant = containsConstant;
+ this.globalFitInfo = new double[5];
+ Arrays.fill(this.globalFitInfo, Double.NaN);
+
+ if (rank > 0) {
+ this.globalFitInfo[SST_IDX] = containsConstant ?
+ (sumysq - sumy * sumy / nobs) : sumysq;
+ }
+
+ this.globalFitInfo[SSE_IDX] = sse;
+ this.globalFitInfo[MSE_IDX] = this.globalFitInfo[SSE_IDX] /
+ (nobs - rank);
+ this.globalFitInfo[RSQ_IDX] = 1.0 -
+ this.globalFitInfo[SSE_IDX] /
+ this.globalFitInfo[SST_IDX];
+
+ if (!containsConstant) {
+ this.globalFitInfo[ADJRSQ_IDX] = 1.0-
+ (1.0 - this.globalFitInfo[RSQ_IDX]) *
+ ( (double) nobs / ( (double) (nobs - rank)));
+ } else {
+ this.globalFitInfo[ADJRSQ_IDX] = 1.0 - (sse * (nobs - 1.0)) /
+ (globalFitInfo[SST_IDX] * (nobs - rank));
+ }
+ }
+
+ /**
+ * <p>Returns the parameter estimate for the regressor at the given index.</p>
+ *
+ * <p>A redundant regressor will have its redundancy flag set, as well as
+ * a parameters estimated equal to {@code Double.NaN}</p>
+ *
+ * @param index Index.
+ * @return the parameters estimated for regressor at index.
+ * @throws OutOfRangeException if {@code index} is not in the interval
+ * {@code [0, number of parameters)}.
+ */
+ public double getParameterEstimate(int index) throws OutOfRangeException {
+ if (parameters == null) {
+ return Double.NaN;
+ }
+ if (index < 0 || index >= this.parameters.length) {
+ throw new OutOfRangeException(index, 0, this.parameters.length - 1);
+ }
+ return this.parameters[index];
+ }
+
+ /**
+ * <p>Returns a copy of the regression parameters estimates.</p>
+ *
+ * <p>The parameter estimates are returned in the natural order of the data.</p>
+ *
+ * <p>A redundant regressor will have its redundancy flag set, as will
+ * a parameter estimate equal to {@code Double.NaN}.</p>
+ *
+ * @return array of parameter estimates, null if no estimation occurred
+ */
+ public double[] getParameterEstimates() {
+ if (this.parameters == null) {
+ return null;
+ }
+ return MathArrays.copyOf(parameters);
+ }
+
+ /**
+ * Returns the <a href="http://www.xycoon.com/standerrorb(1).htm">standard
+ * error of the parameter estimate at index</a>,
+ * usually denoted s(b<sub>index</sub>).
+ *
+ * @param index Index.
+ * @return the standard errors associated with parameters estimated at index.
+ * @throws OutOfRangeException if {@code index} is not in the interval
+ * {@code [0, number of parameters)}.
+ */
+ public double getStdErrorOfEstimate(int index) throws OutOfRangeException {
+ if (parameters == null) {
+ return Double.NaN;
+ }
+ if (index < 0 || index >= this.parameters.length) {
+ throw new OutOfRangeException(index, 0, this.parameters.length - 1);
+ }
+ double var = this.getVcvElement(index, index);
+ if (!Double.isNaN(var) && var > Double.MIN_VALUE) {
+ return FastMath.sqrt(var);
+ }
+ return Double.NaN;
+ }
+
+ /**
+ * <p>Returns the <a href="http://www.xycoon.com/standerrorb(1).htm">standard
+ * error of the parameter estimates</a>,
+ * usually denoted s(b<sub>i</sub>).</p>
+ *
+ * <p>If there are problems with an ill conditioned design matrix then the regressor
+ * which is redundant will be assigned <code>Double.NaN</code>. </p>
+ *
+ * @return an array standard errors associated with parameters estimates,
+ * null if no estimation occurred
+ */
+ public double[] getStdErrorOfEstimates() {
+ if (parameters == null) {
+ return null;
+ }
+ double[] se = new double[this.parameters.length];
+ for (int i = 0; i < this.parameters.length; i++) {
+ double var = this.getVcvElement(i, i);
+ if (!Double.isNaN(var) && var > Double.MIN_VALUE) {
+ se[i] = FastMath.sqrt(var);
+ continue;
+ }
+ se[i] = Double.NaN;
+ }
+ return se;
+ }
+
+ /**
+ * <p>Returns the covariance between regression parameters i and j.</p>
+ *
+ * <p>If there are problems with an ill conditioned design matrix then the covariance
+ * which involves redundant columns will be assigned {@code Double.NaN}. </p>
+ *
+ * @param i {@code i}th regression parameter.
+ * @param j {@code j}th regression parameter.
+ * @return the covariance of the parameter estimates.
+ * @throws OutOfRangeException if {@code i} or {@code j} is not in the
+ * interval {@code [0, number of parameters)}.
+ */
+ public double getCovarianceOfParameters(int i, int j) throws OutOfRangeException {
+ if (parameters == null) {
+ return Double.NaN;
+ }
+ if (i < 0 || i >= this.parameters.length) {
+ throw new OutOfRangeException(i, 0, this.parameters.length - 1);
+ }
+ if (j < 0 || j >= this.parameters.length) {
+ throw new OutOfRangeException(j, 0, this.parameters.length - 1);
+ }
+ return this.getVcvElement(i, j);
+ }
+
+ /**
+ * <p>Returns the number of parameters estimated in the model.</p>
+ *
+ * <p>This is the maximum number of regressors, some techniques may drop
+ * redundant parameters</p>
+ *
+ * @return number of regressors, -1 if not estimated
+ */
+ public int getNumberOfParameters() {
+ if (this.parameters == null) {
+ return -1;
+ }
+ return this.parameters.length;
+ }
+
+ /**
+ * Returns the number of observations added to the regression model.
+ *
+ * @return Number of observations, -1 if an error condition prevents estimation
+ */
+ public long getN() {
+ return this.nobs;
+ }
+
+ /**
+ * <p>Returns the sum of squared deviations of the y values about their mean.</p>
+ *
+ * <p>This is defined as SSTO
+ * <a href="http://www.xycoon.com/SumOfSquares.htm">here</a>.</p>
+ *
+ * <p>If {@code n < 2}, this returns {@code Double.NaN}.</p>
+ *
+ * @return sum of squared deviations of y values
+ */
+ public double getTotalSumSquares() {
+ return this.globalFitInfo[SST_IDX];
+ }
+
+ /**
+ * <p>Returns the sum of squared deviations of the predicted y values about
+ * their mean (which equals the mean of y).</p>
+ *
+ * <p>This is usually abbreviated SSR or SSM. It is defined as SSM
+ * <a href="http://www.xycoon.com/SumOfSquares.htm">here</a></p>
+ *
+ * <p><strong>Preconditions</strong>: <ul>
+ * <li>At least two observations (with at least two different x values)
+ * must have been added before invoking this method. If this method is
+ * invoked before a model can be estimated, <code>Double.NaN</code> is
+ * returned.
+ * </li></ul></p>
+ *
+ * @return sum of squared deviations of predicted y values
+ */
+ public double getRegressionSumSquares() {
+ return this.globalFitInfo[SST_IDX] - this.globalFitInfo[SSE_IDX];
+ }
+
+ /**
+ * <p>Returns the <a href="http://www.xycoon.com/SumOfSquares.htm">
+ * sum of squared errors</a> (SSE) associated with the regression
+ * model.</p>
+ *
+ * <p>The return value is constrained to be non-negative - i.e., if due to
+ * rounding errors the computational formula returns a negative result,
+ * 0 is returned.</p>
+ *
+ * <p><strong>Preconditions</strong>: <ul>
+ * <li>numberOfParameters data pairs
+ * must have been added before invoking this method. If this method is
+ * invoked before a model can be estimated, <code>Double,NaN</code> is
+ * returned.
+ * </li></ul></p>
+ *
+ * @return sum of squared errors associated with the regression model
+ */
+ public double getErrorSumSquares() {
+ return this.globalFitInfo[ SSE_IDX];
+ }
+
+ /**
+ * <p>Returns the sum of squared errors divided by the degrees of freedom,
+ * usually abbreviated MSE.</p>
+ *
+ * <p>If there are fewer than <strong>numberOfParameters + 1</strong> data pairs in the model,
+ * or if there is no variation in <code>x</code>, this returns
+ * <code>Double.NaN</code>.</p>
+ *
+ * @return sum of squared deviations of y values
+ */
+ public double getMeanSquareError() {
+ return this.globalFitInfo[ MSE_IDX];
+ }
+
+ /**
+ * <p>Returns the <a href="http://www.xycoon.com/coefficient1.htm">
+ * coefficient of multiple determination</a>,
+ * usually denoted r-square.</p>
+ *
+ * <p><strong>Preconditions</strong>: <ul>
+ * <li>At least numberOfParameters observations (with at least numberOfParameters different x values)
+ * must have been added before invoking this method. If this method is
+ * invoked before a model can be estimated, {@code Double,NaN} is
+ * returned.
+ * </li></ul></p>
+ *
+ * @return r-square, a double in the interval [0, 1]
+ */
+ public double getRSquared() {
+ return this.globalFitInfo[ RSQ_IDX];
+ }
+
+ /**
+ * <p>Returns the adjusted R-squared statistic, defined by the formula <pre>
+ * R<sup>2</sup><sub>adj</sub> = 1 - [SSR (n - 1)] / [SSTO (n - p)]
+ * </pre>
+ * where SSR is the sum of squared residuals},
+ * SSTO is the total sum of squares}, n is the number
+ * of observations and p is the number of parameters estimated (including the intercept).</p>
+ *
+ * <p>If the regression is estimated without an intercept term, what is returned is <pre>
+ * <code> 1 - (1 - {@link #getRSquared()} ) * (n / (n - p)) </code>
+ * </pre></p>
+ *
+ * @return adjusted R-Squared statistic
+ */
+ public double getAdjustedRSquared() {
+ return this.globalFitInfo[ ADJRSQ_IDX];
+ }
+
+ /**
+ * Returns true if the regression model has been computed including an intercept.
+ * In this case, the coefficient of the intercept is the first element of the
+ * {@link #getParameterEstimates() parameter estimates}.
+ * @return true if the model has an intercept term
+ */
+ public boolean hasIntercept() {
+ return this.containsConstant;
+ }
+
+ /**
+ * Gets the i-jth element of the variance-covariance matrix.
+ *
+ * @param i first variable index
+ * @param j second variable index
+ * @return the requested variance-covariance matrix entry
+ */
+ private double getVcvElement(int i, int j) {
+ if (this.isSymmetricVCD) {
+ if (this.varCovData.length > 1) {
+ //could be stored in upper or lower triangular
+ if (i == j) {
+ return varCovData[i][i];
+ } else if (i >= varCovData[j].length) {
+ return varCovData[i][j];
+ } else {
+ return varCovData[j][i];
+ }
+ } else {//could be in single array
+ if (i > j) {
+ return varCovData[0][(i + 1) * i / 2 + j];
+ } else {
+ return varCovData[0][(j + 1) * j / 2 + i];
+ }
+ }
+ } else {
+ return this.varCovData[i][j];
+ }
+ }
+}
diff --git a/src/main/java/org/apache/commons/math3/stat/regression/SimpleRegression.java b/src/main/java/org/apache/commons/math3/stat/regression/SimpleRegression.java
new file mode 100644
index 0000000..02bf8f4
--- /dev/null
+++ b/src/main/java/org/apache/commons/math3/stat/regression/SimpleRegression.java
@@ -0,0 +1,881 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.math3.stat.regression;
+import java.io.Serializable;
+
+import org.apache.commons.math3.distribution.TDistribution;
+import org.apache.commons.math3.exception.MathIllegalArgumentException;
+import org.apache.commons.math3.exception.NoDataException;
+import org.apache.commons.math3.exception.OutOfRangeException;
+import org.apache.commons.math3.exception.util.LocalizedFormats;
+import org.apache.commons.math3.util.FastMath;
+import org.apache.commons.math3.util.Precision;
+
+/**
+ * Estimates an ordinary least squares regression model
+ * with one independent variable.
+ * <p>
+ * <code> y = intercept + slope * x </code></p>
+ * <p>
+ * Standard errors for <code>intercept</code> and <code>slope</code> are
+ * available as well as ANOVA, r-square and Pearson's r statistics.</p>
+ * <p>
+ * Observations (x,y pairs) can be added to the model one at a time or they
+ * can be provided in a 2-dimensional array. The observations are not stored
+ * in memory, so there is no limit to the number of observations that can be
+ * added to the model.</p>
+ * <p>
+ * <strong>Usage Notes</strong>: <ul>
+ * <li> When there are fewer than two observations in the model, or when
+ * there is no variation in the x values (i.e. all x values are the same)
+ * all statistics return <code>NaN</code>. At least two observations with
+ * different x coordinates are required to estimate a bivariate regression
+ * model.
+ * </li>
+ * <li> Getters for the statistics always compute values based on the current
+ * set of observations -- i.e., you can get statistics, then add more data
+ * and get updated statistics without using a new instance. There is no
+ * "compute" method that updates all statistics. Each of the getters performs
+ * the necessary computations to return the requested statistic.
+ * </li>
+ * <li> The intercept term may be suppressed by passing {@code false} to
+ * the {@link #SimpleRegression(boolean)} constructor. When the
+ * {@code hasIntercept} property is false, the model is estimated without a
+ * constant term and {@link #getIntercept()} returns {@code 0}.</li>
+ * </ul></p>
+ *
+ */
+public class SimpleRegression implements Serializable, UpdatingMultipleLinearRegression {
+
+ /** Serializable version identifier */
+ private static final long serialVersionUID = -3004689053607543335L;
+
+ /** sum of x values */
+ private double sumX = 0d;
+
+ /** total variation in x (sum of squared deviations from xbar) */
+ private double sumXX = 0d;
+
+ /** sum of y values */
+ private double sumY = 0d;
+
+ /** total variation in y (sum of squared deviations from ybar) */
+ private double sumYY = 0d;
+
+ /** sum of products */
+ private double sumXY = 0d;
+
+ /** number of observations */
+ private long n = 0;
+
+ /** mean of accumulated x values, used in updating formulas */
+ private double xbar = 0;
+
+ /** mean of accumulated y values, used in updating formulas */
+ private double ybar = 0;
+
+ /** include an intercept or not */
+ private final boolean hasIntercept;
+ // ---------------------Public methods--------------------------------------
+
+ /**
+ * Create an empty SimpleRegression instance
+ */
+ public SimpleRegression() {
+ this(true);
+ }
+ /**
+ * Create a SimpleRegression instance, specifying whether or not to estimate
+ * an intercept.
+ *
+ * <p>Use {@code false} to estimate a model with no intercept. When the
+ * {@code hasIntercept} property is false, the model is estimated without a
+ * constant term and {@link #getIntercept()} returns {@code 0}.</p>
+ *
+ * @param includeIntercept whether or not to include an intercept term in
+ * the regression model
+ */
+ public SimpleRegression(boolean includeIntercept) {
+ super();
+ hasIntercept = includeIntercept;
+ }
+
+ /**
+ * Adds the observation (x,y) to the regression data set.
+ * <p>
+ * Uses updating formulas for means and sums of squares defined in
+ * "Algorithms for Computing the Sample Variance: Analysis and
+ * Recommendations", Chan, T.F., Golub, G.H., and LeVeque, R.J.
+ * 1983, American Statistician, vol. 37, pp. 242-247, referenced in
+ * Weisberg, S. "Applied Linear Regression". 2nd Ed. 1985.</p>
+ *
+ *
+ * @param x independent variable value
+ * @param y dependent variable value
+ */
+ public void addData(final double x,final double y) {
+ if (n == 0) {
+ xbar = x;
+ ybar = y;
+ } else {
+ if( hasIntercept ){
+ final double fact1 = 1.0 + n;
+ final double fact2 = n / (1.0 + n);
+ final double dx = x - xbar;
+ final double dy = y - ybar;
+ sumXX += dx * dx * fact2;
+ sumYY += dy * dy * fact2;
+ sumXY += dx * dy * fact2;
+ xbar += dx / fact1;
+ ybar += dy / fact1;
+ }
+ }
+ if( !hasIntercept ){
+ sumXX += x * x ;
+ sumYY += y * y ;
+ sumXY += x * y ;
+ }
+ sumX += x;
+ sumY += y;
+ n++;
+ }
+
+ /**
+ * Appends data from another regression calculation to this one.
+ *
+ * <p>The mean update formulae are based on a paper written by Philippe
+ * P&eacute;bay:
+ * <a
+ * href="http://prod.sandia.gov/techlib/access-control.cgi/2008/086212.pdf">
+ * Formulas for Robust, One-Pass Parallel Computation of Covariances and
+ * Arbitrary-Order Statistical Moments</a>, 2008, Technical Report
+ * SAND2008-6212, Sandia National Laboratories.</p>
+ *
+ * @param reg model to append data from
+ * @since 3.3
+ */
+ public void append(SimpleRegression reg) {
+ if (n == 0) {
+ xbar = reg.xbar;
+ ybar = reg.ybar;
+ sumXX = reg.sumXX;
+ sumYY = reg.sumYY;
+ sumXY = reg.sumXY;
+ } else {
+ if (hasIntercept) {
+ final double fact1 = reg.n / (double) (reg.n + n);
+ final double fact2 = n * reg.n / (double) (reg.n + n);
+ final double dx = reg.xbar - xbar;
+ final double dy = reg.ybar - ybar;
+ sumXX += reg.sumXX + dx * dx * fact2;
+ sumYY += reg.sumYY + dy * dy * fact2;
+ sumXY += reg.sumXY + dx * dy * fact2;
+ xbar += dx * fact1;
+ ybar += dy * fact1;
+ }else{
+ sumXX += reg.sumXX;
+ sumYY += reg.sumYY;
+ sumXY += reg.sumXY;
+ }
+ }
+ sumX += reg.sumX;
+ sumY += reg.sumY;
+ n += reg.n;
+ }
+
+ /**
+ * Removes the observation (x,y) from the regression data set.
+ * <p>
+ * Mirrors the addData method. This method permits the use of
+ * SimpleRegression instances in streaming mode where the regression
+ * is applied to a sliding "window" of observations, however the caller is
+ * responsible for maintaining the set of observations in the window.</p>
+ *
+ * The method has no effect if there are no points of data (i.e. n=0)
+ *
+ * @param x independent variable value
+ * @param y dependent variable value
+ */
+ public void removeData(final double x,final double y) {
+ if (n > 0) {
+ if (hasIntercept) {
+ final double fact1 = n - 1.0;
+ final double fact2 = n / (n - 1.0);
+ final double dx = x - xbar;
+ final double dy = y - ybar;
+ sumXX -= dx * dx * fact2;
+ sumYY -= dy * dy * fact2;
+ sumXY -= dx * dy * fact2;
+ xbar -= dx / fact1;
+ ybar -= dy / fact1;
+ } else {
+ final double fact1 = n - 1.0;
+ sumXX -= x * x;
+ sumYY -= y * y;
+ sumXY -= x * y;
+ xbar -= x / fact1;
+ ybar -= y / fact1;
+ }
+ sumX -= x;
+ sumY -= y;
+ n--;
+ }
+ }
+
+ /**
+ * Adds the observations represented by the elements in
+ * <code>data</code>.
+ * <p>
+ * <code>(data[0][0],data[0][1])</code> will be the first observation, then
+ * <code>(data[1][0],data[1][1])</code>, etc.</p>
+ * <p>
+ * This method does not replace data that has already been added. The
+ * observations represented by <code>data</code> are added to the existing
+ * dataset.</p>
+ * <p>
+ * To replace all data, use <code>clear()</code> before adding the new
+ * data.</p>
+ *
+ * @param data array of observations to be added
+ * @throws ModelSpecificationException if the length of {@code data[i]} is not
+ * greater than or equal to 2
+ */
+ public void addData(final double[][] data) throws ModelSpecificationException {
+ for (int i = 0; i < data.length; i++) {
+ if( data[i].length < 2 ){
+ throw new ModelSpecificationException(LocalizedFormats.INVALID_REGRESSION_OBSERVATION,
+ data[i].length, 2);
+ }
+ addData(data[i][0], data[i][1]);
+ }
+ }
+
+ /**
+ * Adds one observation to the regression model.
+ *
+ * @param x the independent variables which form the design matrix
+ * @param y the dependent or response variable
+ * @throws ModelSpecificationException if the length of {@code x} does not equal
+ * the number of independent variables in the model
+ */
+ public void addObservation(final double[] x,final double y)
+ throws ModelSpecificationException {
+ if( x == null || x.length == 0 ){
+ throw new ModelSpecificationException(LocalizedFormats.INVALID_REGRESSION_OBSERVATION,x!=null?x.length:0, 1);
+ }
+ addData( x[0], y );
+ }
+
+ /**
+ * Adds a series of observations to the regression model. The lengths of
+ * x and y must be the same and x must be rectangular.
+ *
+ * @param x a series of observations on the independent variables
+ * @param y a series of observations on the dependent variable
+ * The length of x and y must be the same
+ * @throws ModelSpecificationException if {@code x} is not rectangular, does not match
+ * the length of {@code y} or does not contain sufficient data to estimate the model
+ */
+ public void addObservations(final double[][] x,final double[] y) throws ModelSpecificationException {
+ if ((x == null) || (y == null) || (x.length != y.length)) {
+ throw new ModelSpecificationException(
+ LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE,
+ (x == null) ? 0 : x.length,
+ (y == null) ? 0 : y.length);
+ }
+ boolean obsOk=true;
+ for( int i = 0 ; i < x.length; i++){
+ if( x[i] == null || x[i].length == 0 ){
+ obsOk = false;
+ }
+ }
+ if( !obsOk ){
+ throw new ModelSpecificationException(
+ LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
+ 0, 1);
+ }
+ for( int i = 0 ; i < x.length ; i++){
+ addData( x[i][0], y[i] );
+ }
+ }
+
+ /**
+ * Removes observations represented by the elements in <code>data</code>.
+ * <p>
+ * If the array is larger than the current n, only the first n elements are
+ * processed. This method permits the use of SimpleRegression instances in
+ * streaming mode where the regression is applied to a sliding "window" of
+ * observations, however the caller is responsible for maintaining the set
+ * of observations in the window.</p>
+ * <p>
+ * To remove all data, use <code>clear()</code>.</p>
+ *
+ * @param data array of observations to be removed
+ */
+ public void removeData(double[][] data) {
+ for (int i = 0; i < data.length && n > 0; i++) {
+ removeData(data[i][0], data[i][1]);
+ }
+ }
+
+ /**
+ * Clears all data from the model.
+ */
+ public void clear() {
+ sumX = 0d;
+ sumXX = 0d;
+ sumY = 0d;
+ sumYY = 0d;
+ sumXY = 0d;
+ n = 0;
+ }
+
+ /**
+ * Returns the number of observations that have been added to the model.
+ *
+ * @return n number of observations that have been added.
+ */
+ public long getN() {
+ return n;
+ }
+
+ /**
+ * Returns the "predicted" <code>y</code> value associated with the
+ * supplied <code>x</code> value, based on the data that has been
+ * added to the model when this method is activated.
+ * <p>
+ * <code> predict(x) = intercept + slope * x </code></p>
+ * <p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>At least two observations (with at least two different x values)
+ * must have been added before invoking this method. If this method is
+ * invoked before a model can be estimated, <code>Double,NaN</code> is
+ * returned.
+ * </li></ul></p>
+ *
+ * @param x input <code>x</code> value
+ * @return predicted <code>y</code> value
+ */
+ public double predict(final double x) {
+ final double b1 = getSlope();
+ if (hasIntercept) {
+ return getIntercept(b1) + b1 * x;
+ }
+ return b1 * x;
+ }
+
+ /**
+ * Returns the intercept of the estimated regression line, if
+ * {@link #hasIntercept()} is true; otherwise 0.
+ * <p>
+ * The least squares estimate of the intercept is computed using the
+ * <a href="http://www.xycoon.com/estimation4.htm">normal equations</a>.
+ * The intercept is sometimes denoted b0.</p>
+ * <p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>At least two observations (with at least two different x values)
+ * must have been added before invoking this method. If this method is
+ * invoked before a model can be estimated, <code>Double,NaN</code> is
+ * returned.
+ * </li></ul></p>
+ *
+ * @return the intercept of the regression line if the model includes an
+ * intercept; 0 otherwise
+ * @see #SimpleRegression(boolean)
+ */
+ public double getIntercept() {
+ return hasIntercept ? getIntercept(getSlope()) : 0.0;
+ }
+
+ /**
+ * Returns true if the model includes an intercept term.
+ *
+ * @return true if the regression includes an intercept; false otherwise
+ * @see #SimpleRegression(boolean)
+ */
+ public boolean hasIntercept() {
+ return hasIntercept;
+ }
+
+ /**
+ * Returns the slope of the estimated regression line.
+ * <p>
+ * The least squares estimate of the slope is computed using the
+ * <a href="http://www.xycoon.com/estimation4.htm">normal equations</a>.
+ * The slope is sometimes denoted b1.</p>
+ * <p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>At least two observations (with at least two different x values)
+ * must have been added before invoking this method. If this method is
+ * invoked before a model can be estimated, <code>Double.NaN</code> is
+ * returned.
+ * </li></ul></p>
+ *
+ * @return the slope of the regression line
+ */
+ public double getSlope() {
+ if (n < 2) {
+ return Double.NaN; //not enough data
+ }
+ if (FastMath.abs(sumXX) < 10 * Double.MIN_VALUE) {
+ return Double.NaN; //not enough variation in x
+ }
+ return sumXY / sumXX;
+ }
+
+ /**
+ * Returns the <a href="http://www.xycoon.com/SumOfSquares.htm">
+ * sum of squared errors</a> (SSE) associated with the regression
+ * model.
+ * <p>
+ * The sum is computed using the computational formula</p>
+ * <p>
+ * <code>SSE = SYY - (SXY * SXY / SXX)</code></p>
+ * <p>
+ * where <code>SYY</code> is the sum of the squared deviations of the y
+ * values about their mean, <code>SXX</code> is similarly defined and
+ * <code>SXY</code> is the sum of the products of x and y mean deviations.
+ * </p><p>
+ * The sums are accumulated using the updating algorithm referenced in
+ * {@link #addData}.</p>
+ * <p>
+ * The return value is constrained to be non-negative - i.e., if due to
+ * rounding errors the computational formula returns a negative result,
+ * 0 is returned.</p>
+ * <p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>At least two observations (with at least two different x values)
+ * must have been added before invoking this method. If this method is
+ * invoked before a model can be estimated, <code>Double,NaN</code> is
+ * returned.
+ * </li></ul></p>
+ *
+ * @return sum of squared errors associated with the regression model
+ */
+ public double getSumSquaredErrors() {
+ return FastMath.max(0d, sumYY - sumXY * sumXY / sumXX);
+ }
+
+ /**
+ * Returns the sum of squared deviations of the y values about their mean.
+ * <p>
+ * This is defined as SSTO
+ * <a href="http://www.xycoon.com/SumOfSquares.htm">here</a>.</p>
+ * <p>
+ * If <code>n < 2</code>, this returns <code>Double.NaN</code>.</p>
+ *
+ * @return sum of squared deviations of y values
+ */
+ public double getTotalSumSquares() {
+ if (n < 2) {
+ return Double.NaN;
+ }
+ return sumYY;
+ }
+
+ /**
+ * Returns the sum of squared deviations of the x values about their mean.
+ *
+ * If <code>n < 2</code>, this returns <code>Double.NaN</code>.</p>
+ *
+ * @return sum of squared deviations of x values
+ */
+ public double getXSumSquares() {
+ if (n < 2) {
+ return Double.NaN;
+ }
+ return sumXX;
+ }
+
+ /**
+ * Returns the sum of crossproducts, x<sub>i</sub>*y<sub>i</sub>.
+ *
+ * @return sum of cross products
+ */
+ public double getSumOfCrossProducts() {
+ return sumXY;
+ }
+
+ /**
+ * Returns the sum of squared deviations of the predicted y values about
+ * their mean (which equals the mean of y).
+ * <p>
+ * This is usually abbreviated SSR or SSM. It is defined as SSM
+ * <a href="http://www.xycoon.com/SumOfSquares.htm">here</a></p>
+ * <p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>At least two observations (with at least two different x values)
+ * must have been added before invoking this method. If this method is
+ * invoked before a model can be estimated, <code>Double.NaN</code> is
+ * returned.
+ * </li></ul></p>
+ *
+ * @return sum of squared deviations of predicted y values
+ */
+ public double getRegressionSumSquares() {
+ return getRegressionSumSquares(getSlope());
+ }
+
+ /**
+ * Returns the sum of squared errors divided by the degrees of freedom,
+ * usually abbreviated MSE.
+ * <p>
+ * If there are fewer than <strong>three</strong> data pairs in the model,
+ * or if there is no variation in <code>x</code>, this returns
+ * <code>Double.NaN</code>.</p>
+ *
+ * @return sum of squared deviations of y values
+ */
+ public double getMeanSquareError() {
+ if (n < 3) {
+ return Double.NaN;
+ }
+ return hasIntercept ? (getSumSquaredErrors() / (n - 2)) : (getSumSquaredErrors() / (n - 1));
+ }
+
+ /**
+ * Returns <a href="http://mathworld.wolfram.com/CorrelationCoefficient.html">
+ * Pearson's product moment correlation coefficient</a>,
+ * usually denoted r.
+ * <p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>At least two observations (with at least two different x values)
+ * must have been added before invoking this method. If this method is
+ * invoked before a model can be estimated, <code>Double,NaN</code> is
+ * returned.
+ * </li></ul></p>
+ *
+ * @return Pearson's r
+ */
+ public double getR() {
+ double b1 = getSlope();
+ double result = FastMath.sqrt(getRSquare());
+ if (b1 < 0) {
+ result = -result;
+ }
+ return result;
+ }
+
+ /**
+ * Returns the <a href="http://www.xycoon.com/coefficient1.htm">
+ * coefficient of determination</a>,
+ * usually denoted r-square.
+ * <p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>At least two observations (with at least two different x values)
+ * must have been added before invoking this method. If this method is
+ * invoked before a model can be estimated, <code>Double,NaN</code> is
+ * returned.
+ * </li></ul></p>
+ *
+ * @return r-square
+ */
+ public double getRSquare() {
+ double ssto = getTotalSumSquares();
+ return (ssto - getSumSquaredErrors()) / ssto;
+ }
+
+ /**
+ * Returns the <a href="http://www.xycoon.com/standarderrorb0.htm">
+ * standard error of the intercept estimate</a>,
+ * usually denoted s(b0).
+ * <p>
+ * If there are fewer that <strong>three</strong> observations in the
+ * model, or if there is no variation in x, this returns
+ * <code>Double.NaN</code>.</p> Additionally, a <code>Double.NaN</code> is
+ * returned when the intercept is constrained to be zero
+ *
+ * @return standard error associated with intercept estimate
+ */
+ public double getInterceptStdErr() {
+ if( !hasIntercept ){
+ return Double.NaN;
+ }
+ return FastMath.sqrt(
+ getMeanSquareError() * ((1d / n) + (xbar * xbar) / sumXX));
+ }
+
+ /**
+ * Returns the <a href="http://www.xycoon.com/standerrorb(1).htm">standard
+ * error of the slope estimate</a>,
+ * usually denoted s(b1).
+ * <p>
+ * If there are fewer that <strong>three</strong> data pairs in the model,
+ * or if there is no variation in x, this returns <code>Double.NaN</code>.
+ * </p>
+ *
+ * @return standard error associated with slope estimate
+ */
+ public double getSlopeStdErr() {
+ return FastMath.sqrt(getMeanSquareError() / sumXX);
+ }
+
+ /**
+ * Returns the half-width of a 95% confidence interval for the slope
+ * estimate.
+ * <p>
+ * The 95% confidence interval is</p>
+ * <p>
+ * <code>(getSlope() - getSlopeConfidenceInterval(),
+ * getSlope() + getSlopeConfidenceInterval())</code></p>
+ * <p>
+ * If there are fewer that <strong>three</strong> observations in the
+ * model, or if there is no variation in x, this returns
+ * <code>Double.NaN</code>.</p>
+ * <p>
+ * <strong>Usage Note</strong>:<br>
+ * The validity of this statistic depends on the assumption that the
+ * observations included in the model are drawn from a
+ * <a href="http://mathworld.wolfram.com/BivariateNormalDistribution.html">
+ * Bivariate Normal Distribution</a>.</p>
+ *
+ * @return half-width of 95% confidence interval for the slope estimate
+ * @throws OutOfRangeException if the confidence interval can not be computed.
+ */
+ public double getSlopeConfidenceInterval() throws OutOfRangeException {
+ return getSlopeConfidenceInterval(0.05d);
+ }
+
+ /**
+ * Returns the half-width of a (100-100*alpha)% confidence interval for
+ * the slope estimate.
+ * <p>
+ * The (100-100*alpha)% confidence interval is </p>
+ * <p>
+ * <code>(getSlope() - getSlopeConfidenceInterval(),
+ * getSlope() + getSlopeConfidenceInterval())</code></p>
+ * <p>
+ * To request, for example, a 99% confidence interval, use
+ * <code>alpha = .01</code></p>
+ * <p>
+ * <strong>Usage Note</strong>:<br>
+ * The validity of this statistic depends on the assumption that the
+ * observations included in the model are drawn from a
+ * <a href="http://mathworld.wolfram.com/BivariateNormalDistribution.html">
+ * Bivariate Normal Distribution</a>.</p>
+ * <p>
+ * <strong> Preconditions:</strong><ul>
+ * <li>If there are fewer that <strong>three</strong> observations in the
+ * model, or if there is no variation in x, this returns
+ * <code>Double.NaN</code>.
+ * </li>
+ * <li><code>(0 < alpha < 1)</code>; otherwise an
+ * <code>OutOfRangeException</code> is thrown.
+ * </li></ul></p>
+ *
+ * @param alpha the desired significance level
+ * @return half-width of 95% confidence interval for the slope estimate
+ * @throws OutOfRangeException if the confidence interval can not be computed.
+ */
+ public double getSlopeConfidenceInterval(final double alpha)
+ throws OutOfRangeException {
+ if (n < 3) {
+ return Double.NaN;
+ }
+ if (alpha >= 1 || alpha <= 0) {
+ throw new OutOfRangeException(LocalizedFormats.SIGNIFICANCE_LEVEL,
+ alpha, 0, 1);
+ }
+ // No advertised NotStrictlyPositiveException here - will return NaN above
+ TDistribution distribution = new TDistribution(n - 2);
+ return getSlopeStdErr() *
+ distribution.inverseCumulativeProbability(1d - alpha / 2d);
+ }
+
+ /**
+ * Returns the significance level of the slope (equiv) correlation.
+ * <p>
+ * Specifically, the returned value is the smallest <code>alpha</code>
+ * such that the slope confidence interval with significance level
+ * equal to <code>alpha</code> does not include <code>0</code>.
+ * On regression output, this is often denoted <code>Prob(|t| > 0)</code>
+ * </p><p>
+ * <strong>Usage Note</strong>:<br>
+ * The validity of this statistic depends on the assumption that the
+ * observations included in the model are drawn from a
+ * <a href="http://mathworld.wolfram.com/BivariateNormalDistribution.html">
+ * Bivariate Normal Distribution</a>.</p>
+ * <p>
+ * If there are fewer that <strong>three</strong> observations in the
+ * model, or if there is no variation in x, this returns
+ * <code>Double.NaN</code>.</p>
+ *
+ * @return significance level for slope/correlation
+ * @throws org.apache.commons.math3.exception.MaxCountExceededException
+ * if the significance level can not be computed.
+ */
+ public double getSignificance() {
+ if (n < 3) {
+ return Double.NaN;
+ }
+ // No advertised NotStrictlyPositiveException here - will return NaN above
+ TDistribution distribution = new TDistribution(n - 2);
+ return 2d * (1.0 - distribution.cumulativeProbability(
+ FastMath.abs(getSlope()) / getSlopeStdErr()));
+ }
+
+ // ---------------------Private methods-----------------------------------
+
+ /**
+ * Returns the intercept of the estimated regression line, given the slope.
+ * <p>
+ * Will return <code>NaN</code> if slope is <code>NaN</code>.</p>
+ *
+ * @param slope current slope
+ * @return the intercept of the regression line
+ */
+ private double getIntercept(final double slope) {
+ if( hasIntercept){
+ return (sumY - slope * sumX) / n;
+ }
+ return 0.0;
+ }
+
+ /**
+ * Computes SSR from b1.
+ *
+ * @param slope regression slope estimate
+ * @return sum of squared deviations of predicted y values
+ */
+ private double getRegressionSumSquares(final double slope) {
+ return slope * slope * sumXX;
+ }
+
+ /**
+ * Performs a regression on data present in buffers and outputs a RegressionResults object.
+ *
+ * <p>If there are fewer than 3 observations in the model and {@code hasIntercept} is true
+ * a {@code NoDataException} is thrown. If there is no intercept term, the model must
+ * contain at least 2 observations.</p>
+ *
+ * @return RegressionResults acts as a container of regression output
+ * @throws ModelSpecificationException if the model is not correctly specified
+ * @throws NoDataException if there is not sufficient data in the model to
+ * estimate the regression parameters
+ */
+ public RegressionResults regress() throws ModelSpecificationException, NoDataException {
+ if (hasIntercept) {
+ if (n < 3) {
+ throw new NoDataException(LocalizedFormats.NOT_ENOUGH_DATA_REGRESSION);
+ }
+ if (FastMath.abs(sumXX) > Precision.SAFE_MIN) {
+ final double[] params = new double[] { getIntercept(), getSlope() };
+ final double mse = getMeanSquareError();
+ final double _syy = sumYY + sumY * sumY / n;
+ final double[] vcv = new double[] { mse * (xbar * xbar / sumXX + 1.0 / n), -xbar * mse / sumXX, mse / sumXX };
+ return new RegressionResults(params, new double[][] { vcv }, true, n, 2, sumY, _syy, getSumSquaredErrors(), true,
+ false);
+ } else {
+ final double[] params = new double[] { sumY / n, Double.NaN };
+ // final double mse = getMeanSquareError();
+ final double[] vcv = new double[] { ybar / (n - 1.0), Double.NaN, Double.NaN };
+ return new RegressionResults(params, new double[][] { vcv }, true, n, 1, sumY, sumYY, getSumSquaredErrors(), true,
+ false);
+ }
+ } else {
+ if (n < 2) {
+ throw new NoDataException(LocalizedFormats.NOT_ENOUGH_DATA_REGRESSION);
+ }
+ if (!Double.isNaN(sumXX)) {
+ final double[] vcv = new double[] { getMeanSquareError() / sumXX };
+ final double[] params = new double[] { sumXY / sumXX };
+ return new RegressionResults(params, new double[][] { vcv }, true, n, 1, sumY, sumYY, getSumSquaredErrors(), false,
+ false);
+ } else {
+ final double[] vcv = new double[] { Double.NaN };
+ final double[] params = new double[] { Double.NaN };
+ return new RegressionResults(params, new double[][] { vcv }, true, n, 1, Double.NaN, Double.NaN, Double.NaN, false,
+ false);
+ }
+ }
+ }
+
+ /**
+ * Performs a regression on data present in buffers including only regressors
+ * indexed in variablesToInclude and outputs a RegressionResults object
+ * @param variablesToInclude an array of indices of regressors to include
+ * @return RegressionResults acts as a container of regression output
+ * @throws MathIllegalArgumentException if the variablesToInclude array is null or zero length
+ * @throws OutOfRangeException if a requested variable is not present in model
+ */
+ public RegressionResults regress(int[] variablesToInclude) throws MathIllegalArgumentException{
+ if( variablesToInclude == null || variablesToInclude.length == 0){
+ throw new MathIllegalArgumentException(LocalizedFormats.ARRAY_ZERO_LENGTH_OR_NULL_NOT_ALLOWED);
+ }
+ if( variablesToInclude.length > 2 || (variablesToInclude.length > 1 && !hasIntercept) ){
+ throw new ModelSpecificationException(
+ LocalizedFormats.ARRAY_SIZE_EXCEEDS_MAX_VARIABLES,
+ (variablesToInclude.length > 1 && !hasIntercept) ? 1 : 2);
+ }
+
+ if( hasIntercept ){
+ if( variablesToInclude.length == 2 ){
+ if( variablesToInclude[0] == 1 ){
+ throw new ModelSpecificationException(LocalizedFormats.NOT_INCREASING_SEQUENCE);
+ }else if( variablesToInclude[0] != 0 ){
+ throw new OutOfRangeException( variablesToInclude[0], 0,1 );
+ }
+ if( variablesToInclude[1] != 1){
+ throw new OutOfRangeException( variablesToInclude[0], 0,1 );
+ }
+ return regress();
+ }else{
+ if( variablesToInclude[0] != 1 && variablesToInclude[0] != 0 ){
+ throw new OutOfRangeException( variablesToInclude[0],0,1 );
+ }
+ final double _mean = sumY * sumY / n;
+ final double _syy = sumYY + _mean;
+ if( variablesToInclude[0] == 0 ){
+ //just the mean
+ final double[] vcv = new double[]{ sumYY/(((n-1)*n)) };
+ final double[] params = new double[]{ ybar };
+ return new RegressionResults(
+ params, new double[][]{vcv}, true, n, 1,
+ sumY, _syy+_mean, sumYY,true,false);
+
+ }else if( variablesToInclude[0] == 1){
+ //final double _syy = sumYY + sumY * sumY / ((double) n);
+ final double _sxx = sumXX + sumX * sumX / n;
+ final double _sxy = sumXY + sumX * sumY / n;
+ final double _sse = FastMath.max(0d, _syy - _sxy * _sxy / _sxx);
+ final double _mse = _sse/((n-1));
+ if( !Double.isNaN(_sxx) ){
+ final double[] vcv = new double[]{ _mse / _sxx };
+ final double[] params = new double[]{ _sxy/_sxx };
+ return new RegressionResults(
+ params, new double[][]{vcv}, true, n, 1,
+ sumY, _syy, _sse,false,false);
+ }else{
+ final double[] vcv = new double[]{Double.NaN };
+ final double[] params = new double[]{ Double.NaN };
+ return new RegressionResults(
+ params, new double[][]{vcv}, true, n, 1,
+ Double.NaN, Double.NaN, Double.NaN,false,false);
+ }
+ }
+ }
+ }else{
+ if( variablesToInclude[0] != 0 ){
+ throw new OutOfRangeException(variablesToInclude[0],0,0);
+ }
+ return regress();
+ }
+
+ return null;
+ }
+}
diff --git a/src/main/java/org/apache/commons/math3/stat/regression/UpdatingMultipleLinearRegression.java b/src/main/java/org/apache/commons/math3/stat/regression/UpdatingMultipleLinearRegression.java
new file mode 100644
index 0000000..ebefc31
--- /dev/null
+++ b/src/main/java/org/apache/commons/math3/stat/regression/UpdatingMultipleLinearRegression.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math3.stat.regression;
+
+import org.apache.commons.math3.exception.MathIllegalArgumentException;
+import org.apache.commons.math3.exception.NoDataException;
+
+/**
+ * An interface for regression models allowing for dynamic updating of the data.
+ * That is, the entire data set need not be loaded into memory. As observations
+ * become available, they can be added to the regression model and an updated
+ * estimate regression statistics can be calculated.
+ *
+ * @since 3.0
+ */
+public interface UpdatingMultipleLinearRegression {
+
+ /**
+ * Returns true if a constant has been included false otherwise.
+ *
+ * @return true if constant exists, false otherwise
+ */
+ boolean hasIntercept();
+
+ /**
+ * Returns the number of observations added to the regression model.
+ *
+ * @return Number of observations
+ */
+ long getN();
+
+ /**
+ * Adds one observation to the regression model.
+ *
+ * @param x the independent variables which form the design matrix
+ * @param y the dependent or response variable
+ * @throws ModelSpecificationException if the length of {@code x} does not equal
+ * the number of independent variables in the model
+ */
+ void addObservation(double[] x, double y) throws ModelSpecificationException;
+
+ /**
+ * Adds a series of observations to the regression model. The lengths of
+ * x and y must be the same and x must be rectangular.
+ *
+ * @param x a series of observations on the independent variables
+ * @param y a series of observations on the dependent variable
+ * The length of x and y must be the same
+ * @throws ModelSpecificationException if {@code x} is not rectangular, does not match
+ * the length of {@code y} or does not contain sufficient data to estimate the model
+ */
+ void addObservations(double[][] x, double[] y) throws ModelSpecificationException;
+
+ /**
+ * Clears internal buffers and resets the regression model. This means all
+ * data and derived values are initialized
+ */
+ void clear();
+
+
+ /**
+ * Performs a regression on data present in buffers and outputs a RegressionResults object
+ * @return RegressionResults acts as a container of regression output
+ * @throws ModelSpecificationException if the model is not correctly specified
+ * @throws NoDataException if there is not sufficient data in the model to
+ * estimate the regression parameters
+ */
+ RegressionResults regress() throws ModelSpecificationException, NoDataException;
+
+ /**
+ * Performs a regression on data present in buffers including only regressors
+ * indexed in variablesToInclude and outputs a RegressionResults object
+ * @param variablesToInclude an array of indices of regressors to include
+ * @return RegressionResults acts as a container of regression output
+ * @throws ModelSpecificationException if the model is not correctly specified
+ * @throws MathIllegalArgumentException if the variablesToInclude array is null or zero length
+ */
+ RegressionResults regress(int[] variablesToInclude) throws ModelSpecificationException, MathIllegalArgumentException;
+}
diff --git a/src/main/java/org/apache/commons/math3/stat/regression/package-info.java b/src/main/java/org/apache/commons/math3/stat/regression/package-info.java
new file mode 100644
index 0000000..fbc0e12
--- /dev/null
+++ b/src/main/java/org/apache/commons/math3/stat/regression/package-info.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ *
+ * Statistical routines involving multivariate data.
+ *
+ */
+package org.apache.commons.math3.stat.regression;