diff options
Diffstat (limited to 'src/main/java/org/apache/commons/math3/optim')
74 files changed, 14226 insertions, 0 deletions
diff --git a/src/main/java/org/apache/commons/math3/optim/AbstractConvergenceChecker.java b/src/main/java/org/apache/commons/math3/optim/AbstractConvergenceChecker.java new file mode 100644 index 0000000..19c3f62 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/AbstractConvergenceChecker.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim; + +/** + * Base class for all convergence checker implementations. + * + * @param <PAIR> Type of (point, value) pair. + * @since 3.0 + */ +public abstract class AbstractConvergenceChecker<PAIR> implements ConvergenceChecker<PAIR> { + /** Relative tolerance threshold. */ + private final double relativeThreshold; + + /** Absolute tolerance threshold. */ + private final double absoluteThreshold; + + /** + * Build an instance with a specified thresholds. + * + * @param relativeThreshold relative tolerance threshold + * @param absoluteThreshold absolute tolerance threshold + */ + public AbstractConvergenceChecker( + final double relativeThreshold, final double absoluteThreshold) { + this.relativeThreshold = relativeThreshold; + this.absoluteThreshold = absoluteThreshold; + } + + /** + * @return the relative threshold. + */ + public double getRelativeThreshold() { + return relativeThreshold; + } + + /** + * @return the absolute threshold. + */ + public double getAbsoluteThreshold() { + return absoluteThreshold; + } + + /** {@inheritDoc} */ + public abstract boolean converged(int iteration, PAIR previous, PAIR current); +} diff --git a/src/main/java/org/apache/commons/math3/optim/AbstractOptimizationProblem.java b/src/main/java/org/apache/commons/math3/optim/AbstractOptimizationProblem.java new file mode 100644 index 0000000..84da354 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/AbstractOptimizationProblem.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim; + +import org.apache.commons.math3.exception.TooManyEvaluationsException; +import org.apache.commons.math3.exception.TooManyIterationsException; +import org.apache.commons.math3.util.Incrementor; + +/** + * Base class for implementing optimization problems. It contains the boiler-plate code for counting + * the number of evaluations of the objective function and the number of iterations of the + * algorithm, and storing the convergence checker. + * + * @param <PAIR> Type of the point/value pair returned by the optimization algorithm. + * @since 3.3 + */ +public abstract class AbstractOptimizationProblem<PAIR> implements OptimizationProblem<PAIR> { + + /** Callback to use for the evaluation counter. */ + private static final MaxEvalCallback MAX_EVAL_CALLBACK = new MaxEvalCallback(); + + /** Callback to use for the iteration counter. */ + private static final MaxIterCallback MAX_ITER_CALLBACK = new MaxIterCallback(); + + /** max evaluations */ + private final int maxEvaluations; + + /** max iterations */ + private final int maxIterations; + + /** Convergence checker. */ + private final ConvergenceChecker<PAIR> checker; + + /** + * Create an {@link AbstractOptimizationProblem} from the given data. + * + * @param maxEvaluations the number of allowed model function evaluations. + * @param maxIterations the number of allowed iterations. + * @param checker the convergence checker. + */ + protected AbstractOptimizationProblem( + final int maxEvaluations, + final int maxIterations, + final ConvergenceChecker<PAIR> checker) { + this.maxEvaluations = maxEvaluations; + this.maxIterations = maxIterations; + this.checker = checker; + } + + /** {@inheritDoc} */ + public Incrementor getEvaluationCounter() { + return new Incrementor(this.maxEvaluations, MAX_EVAL_CALLBACK); + } + + /** {@inheritDoc} */ + public Incrementor getIterationCounter() { + return new Incrementor(this.maxIterations, MAX_ITER_CALLBACK); + } + + /** {@inheritDoc} */ + public ConvergenceChecker<PAIR> getConvergenceChecker() { + return checker; + } + + /** Defines the action to perform when reaching the maximum number of evaluations. */ + private static class MaxEvalCallback implements Incrementor.MaxCountExceededCallback { + /** + * {@inheritDoc} + * + * @throws TooManyEvaluationsException + */ + public void trigger(int max) { + throw new TooManyEvaluationsException(max); + } + } + + /** Defines the action to perform when reaching the maximum number of evaluations. */ + private static class MaxIterCallback implements Incrementor.MaxCountExceededCallback { + /** + * {@inheritDoc} + * + * @throws TooManyIterationsException + */ + public void trigger(int max) { + throw new TooManyIterationsException(max); + } + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/BaseMultiStartMultivariateOptimizer.java b/src/main/java/org/apache/commons/math3/optim/BaseMultiStartMultivariateOptimizer.java new file mode 100644 index 0000000..ede4e59 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/BaseMultiStartMultivariateOptimizer.java @@ -0,0 +1,221 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim; + +import org.apache.commons.math3.exception.MathIllegalStateException; +import org.apache.commons.math3.exception.NotStrictlyPositiveException; +import org.apache.commons.math3.exception.TooManyEvaluationsException; +import org.apache.commons.math3.random.RandomVectorGenerator; + +/** + * Base class multi-start optimizer for a multivariate function. <br> + * This class wraps an optimizer in order to use it several times in turn with different starting + * points (trying to avoid being trapped in a local extremum when looking for a global one). <em>It + * is not a "user" class.</em> + * + * @param <PAIR> Type of the point/value pair returned by the optimization algorithm. + * @since 3.0 + */ +public abstract class BaseMultiStartMultivariateOptimizer<PAIR> + extends BaseMultivariateOptimizer<PAIR> { + /** Underlying classical optimizer. */ + private final BaseMultivariateOptimizer<PAIR> optimizer; + + /** Number of evaluations already performed for all starts. */ + private int totalEvaluations; + + /** Number of starts to go. */ + private int starts; + + /** Random generator for multi-start. */ + private RandomVectorGenerator generator; + + /** Optimization data. */ + private OptimizationData[] optimData; + + /** + * Location in {@link #optimData} where the updated maximum number of evaluations will be + * stored. + */ + private int maxEvalIndex = -1; + + /** Location in {@link #optimData} where the updated start value will be stored. */ + private int initialGuessIndex = -1; + + /** + * Create a multi-start optimizer from a single-start optimizer. + * + * <p>Note that if there are bounds constraints (see {@link #getLowerBound()} and {@link + * #getUpperBound()}), then a simple rejection algorithm is used at each restart. This implies + * that the random vector generator should have a good probability to generate vectors in the + * bounded domain, otherwise the rejection algorithm will hit the {@link #getMaxEvaluations()} + * count without generating a proper restart point. Users must be take great care of the <a + * href="http://en.wikipedia.org/wiki/Curse_of_dimensionality">curse of dimensionality</a>. + * + * @param optimizer Single-start optimizer to wrap. + * @param starts Number of starts to perform. If {@code starts == 1}, the {@link + * #optimize(OptimizationData[]) optimize} will return the same solution as the given {@code + * optimizer} would return. + * @param generator Random vector generator to use for restarts. + * @throws NotStrictlyPositiveException if {@code starts < 1}. + */ + public BaseMultiStartMultivariateOptimizer( + final BaseMultivariateOptimizer<PAIR> optimizer, + final int starts, + final RandomVectorGenerator generator) { + super(optimizer.getConvergenceChecker()); + + if (starts < 1) { + throw new NotStrictlyPositiveException(starts); + } + + this.optimizer = optimizer; + this.starts = starts; + this.generator = generator; + } + + /** {@inheritDoc} */ + @Override + public int getEvaluations() { + return totalEvaluations; + } + + /** + * Gets all the optima found during the last call to {@code optimize}. The optimizer stores all + * the optima found during a set of restarts. The {@code optimize} method returns the best point + * only. This method returns all the points found at the end of each starts, including the best + * one already returned by the {@code optimize} method. <br> + * The returned array as one element for each start as specified in the constructor. It is + * ordered with the results from the runs that did converge first, sorted from best to worst + * objective value (i.e in ascending order if minimizing and in descending order if maximizing), + * followed by {@code null} elements corresponding to the runs that did not converge. This means + * all elements will be {@code null} if the {@code optimize} method did throw an exception. This + * also means that if the first element is not {@code null}, it is the best point found across + * all starts. <br> + * The behaviour is undefined if this method is called before {@code optimize}; it will likely + * throw {@code NullPointerException}. + * + * @return an array containing the optima sorted from best to worst. + */ + public abstract PAIR[] getOptima(); + + /** + * {@inheritDoc} + * + * @throws MathIllegalStateException if {@code optData} does not contain an instance of {@link + * MaxEval} or {@link InitialGuess}. + */ + @Override + public PAIR optimize(OptimizationData... optData) { + // Store arguments in order to pass them to the internal optimizer. + optimData = optData; + // Set up base class and perform computations. + return super.optimize(optData); + } + + /** {@inheritDoc} */ + @Override + protected PAIR doOptimize() { + // Remove all instances of "MaxEval" and "InitialGuess" from the + // array that will be passed to the internal optimizer. + // The former is to enforce smaller numbers of allowed evaluations + // (according to how many have been used up already), and the latter + // to impose a different start value for each start. + for (int i = 0; i < optimData.length; i++) { + if (optimData[i] instanceof MaxEval) { + optimData[i] = null; + maxEvalIndex = i; + } + if (optimData[i] instanceof InitialGuess) { + optimData[i] = null; + initialGuessIndex = i; + continue; + } + } + if (maxEvalIndex == -1) { + throw new MathIllegalStateException(); + } + if (initialGuessIndex == -1) { + throw new MathIllegalStateException(); + } + + RuntimeException lastException = null; + totalEvaluations = 0; + clear(); + + final int maxEval = getMaxEvaluations(); + final double[] min = getLowerBound(); + final double[] max = getUpperBound(); + final double[] startPoint = getStartPoint(); + + // Multi-start loop. + for (int i = 0; i < starts; i++) { + // CHECKSTYLE: stop IllegalCatch + try { + // Decrease number of allowed evaluations. + optimData[maxEvalIndex] = new MaxEval(maxEval - totalEvaluations); + // New start value. + double[] s = null; + if (i == 0) { + s = startPoint; + } else { + int attempts = 0; + while (s == null) { + if (attempts++ >= getMaxEvaluations()) { + throw new TooManyEvaluationsException(getMaxEvaluations()); + } + s = generator.nextVector(); + for (int k = 0; s != null && k < s.length; ++k) { + if ((min != null && s[k] < min[k]) || (max != null && s[k] > max[k])) { + // reject the vector + s = null; + } + } + } + } + optimData[initialGuessIndex] = new InitialGuess(s); + // Optimize. + final PAIR result = optimizer.optimize(optimData); + store(result); + } catch (RuntimeException mue) { + lastException = mue; + } + // CHECKSTYLE: resume IllegalCatch + + totalEvaluations += optimizer.getEvaluations(); + } + + final PAIR[] optima = getOptima(); + if (optima.length == 0) { + // All runs failed. + throw lastException; // Cannot be null if starts >= 1. + } + + // Return the best optimum. + return optima[0]; + } + + /** + * Method that will be called in order to store each found optimum. + * + * @param optimum Result of an optimization run. + */ + protected abstract void store(PAIR optimum); + + /** Method that will called in order to clear all stored optima. */ + protected abstract void clear(); +} diff --git a/src/main/java/org/apache/commons/math3/optim/BaseMultivariateOptimizer.java b/src/main/java/org/apache/commons/math3/optim/BaseMultivariateOptimizer.java new file mode 100644 index 0000000..e70ab8e --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/BaseMultivariateOptimizer.java @@ -0,0 +1,152 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim; + +import org.apache.commons.math3.exception.DimensionMismatchException; +import org.apache.commons.math3.exception.NumberIsTooLargeException; +import org.apache.commons.math3.exception.NumberIsTooSmallException; + +/** + * Base class for implementing optimizers for multivariate functions. It contains the boiler-plate + * code for initial guess and bounds specifications. <em>It is not a "user" class.</em> + * + * @param <PAIR> Type of the point/value pair returned by the optimization algorithm. + * @since 3.1 + */ +public abstract class BaseMultivariateOptimizer<PAIR> extends BaseOptimizer<PAIR> { + /** Initial guess. */ + private double[] start; + + /** Lower bounds. */ + private double[] lowerBound; + + /** Upper bounds. */ + private double[] upperBound; + + /** + * @param checker Convergence checker. + */ + protected BaseMultivariateOptimizer(ConvergenceChecker<PAIR> checker) { + super(checker); + } + + /** + * {@inheritDoc} + * + * @param optData Optimization data. In addition to those documented in {@link + * BaseOptimizer#parseOptimizationData(OptimizationData[]) BaseOptimizer}, this method will + * register the following data: + * <ul> + * <li>{@link InitialGuess} + * <li>{@link SimpleBounds} + * </ul> + * + * @return {@inheritDoc} + */ + @Override + public PAIR optimize(OptimizationData... optData) { + // Perform optimization. + return super.optimize(optData); + } + + /** + * Scans the list of (required and optional) optimization data that characterize the problem. + * + * @param optData Optimization data. The following data will be looked for: + * <ul> + * <li>{@link InitialGuess} + * <li>{@link SimpleBounds} + * </ul> + */ + @Override + protected void parseOptimizationData(OptimizationData... optData) { + // Allow base class to register its own data. + super.parseOptimizationData(optData); + + // The existing values (as set by the previous call) are reused if + // not provided in the argument list. + for (OptimizationData data : optData) { + if (data instanceof InitialGuess) { + start = ((InitialGuess) data).getInitialGuess(); + continue; + } + if (data instanceof SimpleBounds) { + final SimpleBounds bounds = (SimpleBounds) data; + lowerBound = bounds.getLower(); + upperBound = bounds.getUpper(); + continue; + } + } + + // Check input consistency. + checkParameters(); + } + + /** + * Gets the initial guess. + * + * @return the initial guess, or {@code null} if not set. + */ + public double[] getStartPoint() { + return start == null ? null : start.clone(); + } + + /** + * @return the lower bounds, or {@code null} if not set. + */ + public double[] getLowerBound() { + return lowerBound == null ? null : lowerBound.clone(); + } + + /** + * @return the upper bounds, or {@code null} if not set. + */ + public double[] getUpperBound() { + return upperBound == null ? null : upperBound.clone(); + } + + /** Check parameters consistency. */ + private void checkParameters() { + if (start != null) { + final int dim = start.length; + if (lowerBound != null) { + if (lowerBound.length != dim) { + throw new DimensionMismatchException(lowerBound.length, dim); + } + for (int i = 0; i < dim; i++) { + final double v = start[i]; + final double lo = lowerBound[i]; + if (v < lo) { + throw new NumberIsTooSmallException(v, lo, true); + } + } + } + if (upperBound != null) { + if (upperBound.length != dim) { + throw new DimensionMismatchException(upperBound.length, dim); + } + for (int i = 0; i < dim; i++) { + final double v = start[i]; + final double hi = upperBound[i]; + if (v > hi) { + throw new NumberIsTooLargeException(v, hi, true); + } + } + } + } + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/BaseOptimizer.java b/src/main/java/org/apache/commons/math3/optim/BaseOptimizer.java new file mode 100644 index 0000000..80f7527 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/BaseOptimizer.java @@ -0,0 +1,233 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim; + +import org.apache.commons.math3.exception.TooManyEvaluationsException; +import org.apache.commons.math3.exception.TooManyIterationsException; +import org.apache.commons.math3.util.Incrementor; + +/** + * Base class for implementing optimizers. It contains the boiler-plate code for counting the number + * of evaluations of the objective function and the number of iterations of the algorithm, and + * storing the convergence checker. <em>It is not a "user" class.</em> + * + * @param <PAIR> Type of the point/value pair returned by the optimization algorithm. + * @since 3.1 + */ +public abstract class BaseOptimizer<PAIR> { + /** Evaluations counter. */ + protected final Incrementor evaluations; + + /** Iterations counter. */ + protected final Incrementor iterations; + + /** Convergence checker. */ + private final ConvergenceChecker<PAIR> checker; + + /** + * @param checker Convergence checker. + */ + protected BaseOptimizer(ConvergenceChecker<PAIR> checker) { + this(checker, 0, Integer.MAX_VALUE); + } + + /** + * @param checker Convergence checker. + * @param maxEval Maximum number of objective function evaluations. + * @param maxIter Maximum number of algorithm iterations. + */ + protected BaseOptimizer(ConvergenceChecker<PAIR> checker, int maxEval, int maxIter) { + this.checker = checker; + + evaluations = new Incrementor(maxEval, new MaxEvalCallback()); + iterations = new Incrementor(maxIter, new MaxIterCallback()); + } + + /** + * Gets the maximal number of function evaluations. + * + * @return the maximal number of function evaluations. + */ + public int getMaxEvaluations() { + return evaluations.getMaximalCount(); + } + + /** + * Gets the number of evaluations of the objective function. The number of evaluations + * corresponds to the last call to the {@code optimize} method. It is 0 if the method has not + * been called yet. + * + * @return the number of evaluations of the objective function. + */ + public int getEvaluations() { + return evaluations.getCount(); + } + + /** + * Gets the maximal number of iterations. + * + * @return the maximal number of iterations. + */ + public int getMaxIterations() { + return iterations.getMaximalCount(); + } + + /** + * Gets the number of iterations performed by the algorithm. The number iterations corresponds + * to the last call to the {@code optimize} method. It is 0 if the method has not been called + * yet. + * + * @return the number of evaluations of the objective function. + */ + public int getIterations() { + return iterations.getCount(); + } + + /** + * Gets the convergence checker. + * + * @return the object used to check for convergence. + */ + public ConvergenceChecker<PAIR> getConvergenceChecker() { + return checker; + } + + /** + * Stores data and performs the optimization. + * + * <p>The list of parameters is open-ended so that sub-classes can extend it with arguments + * specific to their concrete implementations. + * + * <p>When the method is called multiple times, instance data is overwritten only when actually + * present in the list of arguments: when not specified, data set in a previous call is retained + * (and thus is optional in subsequent calls). + * + * <p>Important note: Subclasses <em>must</em> override {@link + * #parseOptimizationData(OptimizationData[])} if they need to register their own options; but + * then, they <em>must</em> also call {@code super.parseOptimizationData(optData)} within that + * method. + * + * @param optData Optimization data. This method will register the following data: + * <ul> + * <li>{@link MaxEval} + * <li>{@link MaxIter} + * </ul> + * + * @return a point/value pair that satisfies the convergence criteria. + * @throws TooManyEvaluationsException if the maximal number of evaluations is exceeded. + * @throws TooManyIterationsException if the maximal number of iterations is exceeded. + */ + public PAIR optimize(OptimizationData... optData) + throws TooManyEvaluationsException, TooManyIterationsException { + // Parse options. + parseOptimizationData(optData); + + // Reset counters. + evaluations.resetCount(); + iterations.resetCount(); + // Perform optimization. + return doOptimize(); + } + + /** + * Performs the optimization. + * + * @return a point/value pair that satisfies the convergence criteria. + * @throws TooManyEvaluationsException if the maximal number of evaluations is exceeded. + * @throws TooManyIterationsException if the maximal number of iterations is exceeded. + */ + public PAIR optimize() throws TooManyEvaluationsException, TooManyIterationsException { + // Reset counters. + evaluations.resetCount(); + iterations.resetCount(); + // Perform optimization. + return doOptimize(); + } + + /** + * Performs the bulk of the optimization algorithm. + * + * @return the point/value pair giving the optimal value of the objective function. + */ + protected abstract PAIR doOptimize(); + + /** + * Increment the evaluation count. + * + * @throws TooManyEvaluationsException if the allowed evaluations have been exhausted. + */ + protected void incrementEvaluationCount() throws TooManyEvaluationsException { + evaluations.incrementCount(); + } + + /** + * Increment the iteration count. + * + * @throws TooManyIterationsException if the allowed iterations have been exhausted. + */ + protected void incrementIterationCount() throws TooManyIterationsException { + iterations.incrementCount(); + } + + /** + * Scans the list of (required and optional) optimization data that characterize the problem. + * + * @param optData Optimization data. The following data will be looked for: + * <ul> + * <li>{@link MaxEval} + * <li>{@link MaxIter} + * </ul> + */ + protected void parseOptimizationData(OptimizationData... optData) { + // The existing values (as set by the previous call) are reused if + // not provided in the argument list. + for (OptimizationData data : optData) { + if (data instanceof MaxEval) { + evaluations.setMaximalCount(((MaxEval) data).getMaxEval()); + continue; + } + if (data instanceof MaxIter) { + iterations.setMaximalCount(((MaxIter) data).getMaxIter()); + continue; + } + } + } + + /** Defines the action to perform when reaching the maximum number of evaluations. */ + private static class MaxEvalCallback implements Incrementor.MaxCountExceededCallback { + /** + * {@inheritDoc} + * + * @throws TooManyEvaluationsException + */ + public void trigger(int max) { + throw new TooManyEvaluationsException(max); + } + } + + /** Defines the action to perform when reaching the maximum number of evaluations. */ + private static class MaxIterCallback implements Incrementor.MaxCountExceededCallback { + /** + * {@inheritDoc} + * + * @throws TooManyIterationsException + */ + public void trigger(int max) { + throw new TooManyIterationsException(max); + } + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/ConvergenceChecker.java b/src/main/java/org/apache/commons/math3/optim/ConvergenceChecker.java new file mode 100644 index 0000000..8064560 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/ConvergenceChecker.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.math3.optim; + +/** + * This interface specifies how to check if an optimization algorithm has converged. <br> + * Deciding if convergence has been reached is a problem-dependent issue. The user should provide a + * class implementing this interface to allow the optimization algorithm to stop its search + * according to the problem at hand. <br> + * For convenience, three implementations that fit simple needs are already provided: {@link + * SimpleValueChecker}, {@link SimpleVectorValueChecker} and {@link SimplePointChecker}. The first + * two consider that convergence is reached when the objective function value does not change much + * anymore, it does not use the point set at all. The third one considers that convergence is + * reached when the input point set does not change much anymore, it does not use objective function + * value at all. + * + * @param <PAIR> Type of the (point, objective value) pair. + * @see org.apache.commons.math3.optim.SimplePointChecker + * @see org.apache.commons.math3.optim.SimpleValueChecker + * @see org.apache.commons.math3.optim.SimpleVectorValueChecker + * @since 3.0 + */ +public interface ConvergenceChecker<PAIR> { + /** + * Check if the optimization algorithm has converged. + * + * @param iteration Current iteration. + * @param previous Best point in the previous iteration. + * @param current Best point in the current iteration. + * @return {@code true} if the algorithm is considered to have converged. + */ + boolean converged(int iteration, PAIR previous, PAIR current); +} diff --git a/src/main/java/org/apache/commons/math3/optim/InitialGuess.java b/src/main/java/org/apache/commons/math3/optim/InitialGuess.java new file mode 100644 index 0000000..9323c6f --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/InitialGuess.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.math3.optim; + +/** + * Starting point (first guess) of the optimization procedure. <br> + * Immutable class. + * + * @since 3.1 + */ +public class InitialGuess implements OptimizationData { + /** Initial guess. */ + private final double[] init; + + /** + * @param startPoint Initial guess. + */ + public InitialGuess(double[] startPoint) { + init = startPoint.clone(); + } + + /** + * Gets the initial guess. + * + * @return the initial guess. + */ + public double[] getInitialGuess() { + return init.clone(); + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/MaxEval.java b/src/main/java/org/apache/commons/math3/optim/MaxEval.java new file mode 100644 index 0000000..3c6478e --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/MaxEval.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim; + +import org.apache.commons.math3.exception.NotStrictlyPositiveException; + +/** + * Maximum number of evaluations of the function to be optimized. + * + * @since 3.1 + */ +public class MaxEval implements OptimizationData { + /** Allowed number of evalutations. */ + private final int maxEval; + + /** + * @param max Allowed number of evalutations. + * @throws NotStrictlyPositiveException if {@code max <= 0}. + */ + public MaxEval(int max) { + if (max <= 0) { + throw new NotStrictlyPositiveException(max); + } + + maxEval = max; + } + + /** + * Gets the maximum number of evaluations. + * + * @return the allowed number of evaluations. + */ + public int getMaxEval() { + return maxEval; + } + + /** + * Factory method that creates instance of this class that represents a virtually unlimited + * number of evaluations. + * + * @return a new instance suitable for allowing {@link Integer#MAX_VALUE} evaluations. + */ + public static MaxEval unlimited() { + return new MaxEval(Integer.MAX_VALUE); + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/MaxIter.java b/src/main/java/org/apache/commons/math3/optim/MaxIter.java new file mode 100644 index 0000000..dc9c917 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/MaxIter.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim; + +import org.apache.commons.math3.exception.NotStrictlyPositiveException; + +/** + * Maximum number of iterations performed by an (iterative) algorithm. + * + * @since 3.1 + */ +public class MaxIter implements OptimizationData { + /** Allowed number of evalutations. */ + private final int maxIter; + + /** + * @param max Allowed number of iterations. + * @throws NotStrictlyPositiveException if {@code max <= 0}. + */ + public MaxIter(int max) { + if (max <= 0) { + throw new NotStrictlyPositiveException(max); + } + + maxIter = max; + } + + /** + * Gets the maximum number of evaluations. + * + * @return the allowed number of evaluations. + */ + public int getMaxIter() { + return maxIter; + } + + /** + * Factory method that creates instance of this class that represents a virtually unlimited + * number of iterations. + * + * @return a new instance suitable for allowing {@link Integer#MAX_VALUE} evaluations. + */ + public static MaxIter unlimited() { + return new MaxIter(Integer.MAX_VALUE); + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/OptimizationData.java b/src/main/java/org/apache/commons/math3/optim/OptimizationData.java new file mode 100644 index 0000000..4f55472 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/OptimizationData.java @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim; + +/** + * Marker interface. Implementations will provide functionality (optional or required) needed by the + * optimizers, and those will need to check the actual type of the arguments and perform the + * appropriate cast in order to access the data they need. + * + * @since 3.1 + */ +public interface OptimizationData {} diff --git a/src/main/java/org/apache/commons/math3/optim/OptimizationProblem.java b/src/main/java/org/apache/commons/math3/optim/OptimizationProblem.java new file mode 100644 index 0000000..1b77117 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/OptimizationProblem.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim; + +import org.apache.commons.math3.util.Incrementor; + +/** + * Common settings for all optimization problems. Includes divergence and convergence criteria. + * + * @param <PAIR> The type of value the {@link #getConvergenceChecker() convergence checker} will + * operate on. It should include the value of the model function and point where it was + * evaluated. + * @since 3.3 + */ +public interface OptimizationProblem<PAIR> { + /** + * Get a independent Incrementor that counts up to the maximum number of evaluations and then + * throws an exception. + * + * @return a counter for the evaluations. + */ + Incrementor getEvaluationCounter(); + + /** + * Get a independent Incrementor that counts up to the maximum number of iterations and then + * throws an exception. + * + * @return a counter for the evaluations. + */ + Incrementor getIterationCounter(); + + /** + * Gets the convergence checker. + * + * @return the object used to check for convergence. + */ + ConvergenceChecker<PAIR> getConvergenceChecker(); +} diff --git a/src/main/java/org/apache/commons/math3/optim/PointValuePair.java b/src/main/java/org/apache/commons/math3/optim/PointValuePair.java new file mode 100644 index 0000000..8f7cc01 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/PointValuePair.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim; + +import org.apache.commons.math3.util.Pair; + +import java.io.Serializable; + +/** + * This class holds a point and the value of an objective function at that point. + * + * @see PointVectorValuePair + * @see org.apache.commons.math3.analysis.MultivariateFunction + * @since 3.0 + */ +public class PointValuePair extends Pair<double[], Double> implements Serializable { + /** Serializable UID. */ + private static final long serialVersionUID = 20120513L; + + /** + * Builds a point/objective function value pair. + * + * @param point Point coordinates. This instance will store a copy of the array, not the array + * passed as argument. + * @param value Value of the objective function at the point. + */ + public PointValuePair(final double[] point, final double value) { + this(point, value, true); + } + + /** + * Builds a point/objective function value pair. + * + * @param point Point coordinates. + * @param value Value of the objective function at the point. + * @param copyArray if {@code true}, the input array will be copied, otherwise it will be + * referenced. + */ + public PointValuePair(final double[] point, final double value, final boolean copyArray) { + super(copyArray ? ((point == null) ? null : point.clone()) : point, value); + } + + /** + * Gets the point. + * + * @return a copy of the stored point. + */ + public double[] getPoint() { + final double[] p = getKey(); + return p == null ? null : p.clone(); + } + + /** + * Gets a reference to the point. + * + * @return a reference to the internal array storing the point. + */ + public double[] getPointRef() { + return getKey(); + } + + /** + * Replace the instance with a data transfer object for serialization. + * + * @return data transfer object that will be serialized + */ + private Object writeReplace() { + return new DataTransferObject(getKey(), getValue()); + } + + /** Internal class used only for serialization. */ + private static class DataTransferObject implements Serializable { + /** Serializable UID. */ + private static final long serialVersionUID = 20120513L; + + /** Point coordinates. @Serial */ + private final double[] point; + + /** Value of the objective function at the point. @Serial */ + private final double value; + + /** + * Simple constructor. + * + * @param point Point coordinates. + * @param value Value of the objective function at the point. + */ + DataTransferObject(final double[] point, final double value) { + this.point = point.clone(); + this.value = value; + } + + /** + * Replace the deserialized data transfer object with a {@link PointValuePair}. + * + * @return replacement {@link PointValuePair} + */ + private Object readResolve() { + return new PointValuePair(point, value, false); + } + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/PointVectorValuePair.java b/src/main/java/org/apache/commons/math3/optim/PointVectorValuePair.java new file mode 100644 index 0000000..c0ba93e --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/PointVectorValuePair.java @@ -0,0 +1,139 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim; + +import org.apache.commons.math3.util.Pair; + +import java.io.Serializable; + +/** + * This class holds a point and the vectorial value of an objective function at that point. + * + * @see PointValuePair + * @see org.apache.commons.math3.analysis.MultivariateVectorFunction + * @since 3.0 + */ +public class PointVectorValuePair extends Pair<double[], double[]> implements Serializable { + /** Serializable UID. */ + private static final long serialVersionUID = 20120513L; + + /** + * Builds a point/objective function value pair. + * + * @param point Point coordinates. This instance will store a copy of the array, not the array + * passed as argument. + * @param value Value of the objective function at the point. + */ + public PointVectorValuePair(final double[] point, final double[] value) { + this(point, value, true); + } + + /** + * Build a point/objective function value pair. + * + * @param point Point coordinates. + * @param value Value of the objective function at the point. + * @param copyArray if {@code true}, the input arrays will be copied, otherwise they will be + * referenced. + */ + public PointVectorValuePair( + final double[] point, final double[] value, final boolean copyArray) { + super( + copyArray ? ((point == null) ? null : point.clone()) : point, + copyArray ? ((value == null) ? null : value.clone()) : value); + } + + /** + * Gets the point. + * + * @return a copy of the stored point. + */ + public double[] getPoint() { + final double[] p = getKey(); + return p == null ? null : p.clone(); + } + + /** + * Gets a reference to the point. + * + * @return a reference to the internal array storing the point. + */ + public double[] getPointRef() { + return getKey(); + } + + /** + * Gets the value of the objective function. + * + * @return a copy of the stored value of the objective function. + */ + @Override + public double[] getValue() { + final double[] v = super.getValue(); + return v == null ? null : v.clone(); + } + + /** + * Gets a reference to the value of the objective function. + * + * @return a reference to the internal array storing the value of the objective function. + */ + public double[] getValueRef() { + return super.getValue(); + } + + /** + * Replace the instance with a data transfer object for serialization. + * + * @return data transfer object that will be serialized + */ + private Object writeReplace() { + return new DataTransferObject(getKey(), getValue()); + } + + /** Internal class used only for serialization. */ + private static class DataTransferObject implements Serializable { + /** Serializable UID. */ + private static final long serialVersionUID = 20120513L; + + /** Point coordinates. @Serial */ + private final double[] point; + + /** Value of the objective function at the point. @Serial */ + private final double[] value; + + /** + * Simple constructor. + * + * @param point Point coordinates. + * @param value Value of the objective function at the point. + */ + DataTransferObject(final double[] point, final double[] value) { + this.point = point.clone(); + this.value = value.clone(); + } + + /** + * Replace the deserialized data transfer object with a {@link PointValuePair}. + * + * @return replacement {@link PointValuePair} + */ + private Object readResolve() { + return new PointVectorValuePair(point, value, false); + } + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/SimpleBounds.java b/src/main/java/org/apache/commons/math3/optim/SimpleBounds.java new file mode 100644 index 0000000..8fffb25 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/SimpleBounds.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim; + +import java.util.Arrays; + +/** + * Simple optimization constraints: lower and upper bounds. The valid range of the parameters is an + * interval that can be infinite (in one or both directions). <br> + * Immutable class. + * + * @since 3.1 + */ +public class SimpleBounds implements OptimizationData { + /** Lower bounds. */ + private final double[] lower; + + /** Upper bounds. */ + private final double[] upper; + + /** + * @param lB Lower bounds. + * @param uB Upper bounds. + */ + public SimpleBounds(double[] lB, double[] uB) { + lower = lB.clone(); + upper = uB.clone(); + } + + /** + * Gets the lower bounds. + * + * @return the lower bounds. + */ + public double[] getLower() { + return lower.clone(); + } + + /** + * Gets the upper bounds. + * + * @return the upper bounds. + */ + public double[] getUpper() { + return upper.clone(); + } + + /** + * Factory method that creates instance of this class that represents unbounded ranges. + * + * @param dim Number of parameters. + * @return a new instance suitable for passing to an optimizer that requires bounds + * specification. + */ + public static SimpleBounds unbounded(int dim) { + final double[] lB = new double[dim]; + Arrays.fill(lB, Double.NEGATIVE_INFINITY); + final double[] uB = new double[dim]; + Arrays.fill(uB, Double.POSITIVE_INFINITY); + + return new SimpleBounds(lB, uB); + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/SimplePointChecker.java b/src/main/java/org/apache/commons/math3/optim/SimplePointChecker.java new file mode 100644 index 0000000..f831de6 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/SimplePointChecker.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim; + +import org.apache.commons.math3.exception.NotStrictlyPositiveException; +import org.apache.commons.math3.util.FastMath; +import org.apache.commons.math3.util.Pair; + +/** + * Simple implementation of the {@link ConvergenceChecker} interface using only point coordinates. + * + * <p>Convergence is considered to have been reached if either the relative difference between each + * point coordinate are smaller than a threshold or if either the absolute difference between the + * point coordinates are smaller than another threshold. <br> + * The {@link #converged(int,Pair,Pair) converged} method will also return {@code true} if the + * number of iterations has been set (see {@link #SimplePointChecker(double,double,int) this + * constructor}). + * + * @param <PAIR> Type of the (point, value) pair. The type of the "value" part of the pair (not used + * by this class). + * @since 3.0 + */ +public class SimplePointChecker<PAIR extends Pair<double[], ? extends Object>> + extends AbstractConvergenceChecker<PAIR> { + /** + * If {@link #maxIterationCount} is set to this value, the number of iterations will never cause + * {@link #converged(int, Pair, Pair)} to return {@code true}. + */ + private static final int ITERATION_CHECK_DISABLED = -1; + + /** + * Number of iterations after which the {@link #converged(int, Pair, Pair)} method will return + * true (unless the check is disabled). + */ + private final int maxIterationCount; + + /** + * Build an instance with specified thresholds. In order to perform only relative checks, the + * absolute tolerance must be set to a negative value. In order to perform only absolute checks, + * the relative tolerance must be set to a negative value. + * + * @param relativeThreshold relative tolerance threshold + * @param absoluteThreshold absolute tolerance threshold + */ + public SimplePointChecker(final double relativeThreshold, final double absoluteThreshold) { + super(relativeThreshold, absoluteThreshold); + maxIterationCount = ITERATION_CHECK_DISABLED; + } + + /** + * Builds an instance with specified thresholds. In order to perform only relative checks, the + * absolute tolerance must be set to a negative value. In order to perform only absolute checks, + * the relative tolerance must be set to a negative value. + * + * @param relativeThreshold Relative tolerance threshold. + * @param absoluteThreshold Absolute tolerance threshold. + * @param maxIter Maximum iteration count. + * @throws NotStrictlyPositiveException if {@code maxIter <= 0}. + * @since 3.1 + */ + public SimplePointChecker( + final double relativeThreshold, final double absoluteThreshold, final int maxIter) { + super(relativeThreshold, absoluteThreshold); + + if (maxIter <= 0) { + throw new NotStrictlyPositiveException(maxIter); + } + maxIterationCount = maxIter; + } + + /** + * Check if the optimization algorithm has converged considering the last two points. This + * method may be called several times from the same algorithm iteration with different points. + * This can be detected by checking the iteration number at each call if needed. Each time this + * method is called, the previous and current point correspond to points with the same role at + * each iteration, so they can be compared. As an example, simplex-based algorithms call this + * method for all points of the simplex, not only for the best or worst ones. + * + * @param iteration Index of current iteration + * @param previous Best point in the previous iteration. + * @param current Best point in the current iteration. + * @return {@code true} if the arguments satify the convergence criterion. + */ + @Override + public boolean converged(final int iteration, final PAIR previous, final PAIR current) { + if (maxIterationCount != ITERATION_CHECK_DISABLED && iteration >= maxIterationCount) { + return true; + } + + final double[] p = previous.getKey(); + final double[] c = current.getKey(); + for (int i = 0; i < p.length; ++i) { + final double pi = p[i]; + final double ci = c[i]; + final double difference = FastMath.abs(pi - ci); + final double size = FastMath.max(FastMath.abs(pi), FastMath.abs(ci)); + if (difference > size * getRelativeThreshold() && difference > getAbsoluteThreshold()) { + return false; + } + } + return true; + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/SimpleValueChecker.java b/src/main/java/org/apache/commons/math3/optim/SimpleValueChecker.java new file mode 100644 index 0000000..968fc6e --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/SimpleValueChecker.java @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.math3.optim; + +import org.apache.commons.math3.exception.NotStrictlyPositiveException; +import org.apache.commons.math3.util.FastMath; + +/** + * Simple implementation of the {@link ConvergenceChecker} interface using only objective function + * values. + * + * <p>Convergence is considered to have been reached if either the relative difference between the + * objective function values is smaller than a threshold or if either the absolute difference + * between the objective function values is smaller than another threshold. <br> + * The {@link #converged(int,PointValuePair,PointValuePair) converged} method will also return + * {@code true} if the number of iterations has been set (see {@link + * #SimpleValueChecker(double,double,int) this constructor}). + * + * @since 3.0 + */ +public class SimpleValueChecker extends AbstractConvergenceChecker<PointValuePair> { + /** + * If {@link #maxIterationCount} is set to this value, the number of iterations will never cause + * {@link #converged(int,PointValuePair,PointValuePair)} to return {@code true}. + */ + private static final int ITERATION_CHECK_DISABLED = -1; + + /** + * Number of iterations after which the {@link #converged(int,PointValuePair,PointValuePair)} + * method will return true (unless the check is disabled). + */ + private final int maxIterationCount; + + /** + * Build an instance with specified thresholds. + * + * <p>In order to perform only relative checks, the absolute tolerance must be set to a negative + * value. In order to perform only absolute checks, the relative tolerance must be set to a + * negative value. + * + * @param relativeThreshold relative tolerance threshold + * @param absoluteThreshold absolute tolerance threshold + */ + public SimpleValueChecker(final double relativeThreshold, final double absoluteThreshold) { + super(relativeThreshold, absoluteThreshold); + maxIterationCount = ITERATION_CHECK_DISABLED; + } + + /** + * Builds an instance with specified thresholds. + * + * <p>In order to perform only relative checks, the absolute tolerance must be set to a negative + * value. In order to perform only absolute checks, the relative tolerance must be set to a + * negative value. + * + * @param relativeThreshold relative tolerance threshold + * @param absoluteThreshold absolute tolerance threshold + * @param maxIter Maximum iteration count. + * @throws NotStrictlyPositiveException if {@code maxIter <= 0}. + * @since 3.1 + */ + public SimpleValueChecker( + final double relativeThreshold, final double absoluteThreshold, final int maxIter) { + super(relativeThreshold, absoluteThreshold); + + if (maxIter <= 0) { + throw new NotStrictlyPositiveException(maxIter); + } + maxIterationCount = maxIter; + } + + /** + * Check if the optimization algorithm has converged considering the last two points. This + * method may be called several time from the same algorithm iteration with different points. + * This can be detected by checking the iteration number at each call if needed. Each time this + * method is called, the previous and current point correspond to points with the same role at + * each iteration, so they can be compared. As an example, simplex-based algorithms call this + * method for all points of the simplex, not only for the best or worst ones. + * + * @param iteration Index of current iteration + * @param previous Best point in the previous iteration. + * @param current Best point in the current iteration. + * @return {@code true} if the algorithm has converged. + */ + @Override + public boolean converged( + final int iteration, final PointValuePair previous, final PointValuePair current) { + if (maxIterationCount != ITERATION_CHECK_DISABLED && iteration >= maxIterationCount) { + return true; + } + + final double p = previous.getValue(); + final double c = current.getValue(); + final double difference = FastMath.abs(p - c); + final double size = FastMath.max(FastMath.abs(p), FastMath.abs(c)); + return difference <= size * getRelativeThreshold() || difference <= getAbsoluteThreshold(); + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/SimpleVectorValueChecker.java b/src/main/java/org/apache/commons/math3/optim/SimpleVectorValueChecker.java new file mode 100644 index 0000000..0327bd1 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/SimpleVectorValueChecker.java @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.math3.optim; + +import org.apache.commons.math3.exception.NotStrictlyPositiveException; +import org.apache.commons.math3.util.FastMath; + +/** + * Simple implementation of the {@link ConvergenceChecker} interface using only objective function + * values. + * + * <p>Convergence is considered to have been reached if either the relative difference between the + * objective function values is smaller than a threshold or if either the absolute difference + * between the objective function values is smaller than another threshold for all vectors elements. + * <br> + * The {@link #converged(int,PointVectorValuePair,PointVectorValuePair) converged} method will also + * return {@code true} if the number of iterations has been set (see {@link + * #SimpleVectorValueChecker(double,double,int) this constructor}). + * + * @since 3.0 + */ +public class SimpleVectorValueChecker extends AbstractConvergenceChecker<PointVectorValuePair> { + /** + * If {@link #maxIterationCount} is set to this value, the number of iterations will never cause + * {@link #converged(int,PointVectorValuePair,PointVectorValuePair)} to return {@code true}. + */ + private static final int ITERATION_CHECK_DISABLED = -1; + + /** + * Number of iterations after which the {@link + * #converged(int,PointVectorValuePair,PointVectorValuePair)} method will return true (unless + * the check is disabled). + */ + private final int maxIterationCount; + + /** + * Build an instance with specified thresholds. + * + * <p>In order to perform only relative checks, the absolute tolerance must be set to a negative + * value. In order to perform only absolute checks, the relative tolerance must be set to a + * negative value. + * + * @param relativeThreshold relative tolerance threshold + * @param absoluteThreshold absolute tolerance threshold + */ + public SimpleVectorValueChecker( + final double relativeThreshold, final double absoluteThreshold) { + super(relativeThreshold, absoluteThreshold); + maxIterationCount = ITERATION_CHECK_DISABLED; + } + + /** + * Builds an instance with specified tolerance thresholds and iteration count. + * + * <p>In order to perform only relative checks, the absolute tolerance must be set to a negative + * value. In order to perform only absolute checks, the relative tolerance must be set to a + * negative value. + * + * @param relativeThreshold Relative tolerance threshold. + * @param absoluteThreshold Absolute tolerance threshold. + * @param maxIter Maximum iteration count. + * @throws NotStrictlyPositiveException if {@code maxIter <= 0}. + * @since 3.1 + */ + public SimpleVectorValueChecker( + final double relativeThreshold, final double absoluteThreshold, final int maxIter) { + super(relativeThreshold, absoluteThreshold); + + if (maxIter <= 0) { + throw new NotStrictlyPositiveException(maxIter); + } + maxIterationCount = maxIter; + } + + /** + * Check if the optimization algorithm has converged considering the last two points. This + * method may be called several times from the same algorithm iteration with different points. + * This can be detected by checking the iteration number at each call if needed. Each time this + * method is called, the previous and current point correspond to points with the same role at + * each iteration, so they can be compared. As an example, simplex-based algorithms call this + * method for all points of the simplex, not only for the best or worst ones. + * + * @param iteration Index of current iteration + * @param previous Best point in the previous iteration. + * @param current Best point in the current iteration. + * @return {@code true} if the arguments satify the convergence criterion. + */ + @Override + public boolean converged( + final int iteration, + final PointVectorValuePair previous, + final PointVectorValuePair current) { + if (maxIterationCount != ITERATION_CHECK_DISABLED && iteration >= maxIterationCount) { + return true; + } + + final double[] p = previous.getValueRef(); + final double[] c = current.getValueRef(); + for (int i = 0; i < p.length; ++i) { + final double pi = p[i]; + final double ci = c[i]; + final double difference = FastMath.abs(pi - ci); + final double size = FastMath.max(FastMath.abs(pi), FastMath.abs(ci)); + if (difference > size * getRelativeThreshold() && difference > getAbsoluteThreshold()) { + return false; + } + } + return true; + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/linear/LinearConstraint.java b/src/main/java/org/apache/commons/math3/optim/linear/LinearConstraint.java new file mode 100644 index 0000000..b9fa390 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/linear/LinearConstraint.java @@ -0,0 +1,230 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim.linear; + +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.io.Serializable; +import org.apache.commons.math3.linear.MatrixUtils; +import org.apache.commons.math3.linear.RealVector; +import org.apache.commons.math3.linear.ArrayRealVector; + +/** + * A linear constraint for a linear optimization problem. + * <p> + * A linear constraint has one of the forms: + * <ul> + * <li>c<sub>1</sub>x<sub>1</sub> + ... c<sub>n</sub>x<sub>n</sub> = v</li> + * <li>c<sub>1</sub>x<sub>1</sub> + ... c<sub>n</sub>x<sub>n</sub> <= v</li> + * <li>c<sub>1</sub>x<sub>1</sub> + ... c<sub>n</sub>x<sub>n</sub> >= v</li> + * <li>l<sub>1</sub>x<sub>1</sub> + ... l<sub>n</sub>x<sub>n</sub> + l<sub>cst</sub> = + * r<sub>1</sub>x<sub>1</sub> + ... r<sub>n</sub>x<sub>n</sub> + r<sub>cst</sub></li> + * <li>l<sub>1</sub>x<sub>1</sub> + ... l<sub>n</sub>x<sub>n</sub> + l<sub>cst</sub> <= + * r<sub>1</sub>x<sub>1</sub> + ... r<sub>n</sub>x<sub>n</sub> + r<sub>cst</sub></li> + * <li>l<sub>1</sub>x<sub>1</sub> + ... l<sub>n</sub>x<sub>n</sub> + l<sub>cst</sub> >= + * r<sub>1</sub>x<sub>1</sub> + ... r<sub>n</sub>x<sub>n</sub> + r<sub>cst</sub></li> + * </ul> + * The c<sub>i</sub>, l<sub>i</sub> or r<sub>i</sub> are the coefficients of the constraints, the x<sub>i</sub> + * are the coordinates of the current point and v is the value of the constraint. + * </p> + * + * @since 2.0 + */ +public class LinearConstraint implements Serializable { + /** Serializable version identifier. */ + private static final long serialVersionUID = -764632794033034092L; + /** Coefficients of the constraint (left hand side). */ + private final transient RealVector coefficients; + /** Relationship between left and right hand sides (=, <=, >=). */ + private final Relationship relationship; + /** Value of the constraint (right hand side). */ + private final double value; + + /** + * Build a constraint involving a single linear equation. + * <p> + * A linear constraint with a single linear equation has one of the forms: + * <ul> + * <li>c<sub>1</sub>x<sub>1</sub> + ... c<sub>n</sub>x<sub>n</sub> = v</li> + * <li>c<sub>1</sub>x<sub>1</sub> + ... c<sub>n</sub>x<sub>n</sub> <= v</li> + * <li>c<sub>1</sub>x<sub>1</sub> + ... c<sub>n</sub>x<sub>n</sub> >= v</li> + * </ul> + * </p> + * @param coefficients The coefficients of the constraint (left hand side) + * @param relationship The type of (in)equality used in the constraint + * @param value The value of the constraint (right hand side) + */ + public LinearConstraint(final double[] coefficients, + final Relationship relationship, + final double value) { + this(new ArrayRealVector(coefficients), relationship, value); + } + + /** + * Build a constraint involving a single linear equation. + * <p> + * A linear constraint with a single linear equation has one of the forms: + * <ul> + * <li>c<sub>1</sub>x<sub>1</sub> + ... c<sub>n</sub>x<sub>n</sub> = v</li> + * <li>c<sub>1</sub>x<sub>1</sub> + ... c<sub>n</sub>x<sub>n</sub> <= v</li> + * <li>c<sub>1</sub>x<sub>1</sub> + ... c<sub>n</sub>x<sub>n</sub> >= v</li> + * </ul> + * </p> + * @param coefficients The coefficients of the constraint (left hand side) + * @param relationship The type of (in)equality used in the constraint + * @param value The value of the constraint (right hand side) + */ + public LinearConstraint(final RealVector coefficients, + final Relationship relationship, + final double value) { + this.coefficients = coefficients; + this.relationship = relationship; + this.value = value; + } + + /** + * Build a constraint involving two linear equations. + * <p> + * A linear constraint with two linear equation has one of the forms: + * <ul> + * <li>l<sub>1</sub>x<sub>1</sub> + ... l<sub>n</sub>x<sub>n</sub> + l<sub>cst</sub> = + * r<sub>1</sub>x<sub>1</sub> + ... r<sub>n</sub>x<sub>n</sub> + r<sub>cst</sub></li> + * <li>l<sub>1</sub>x<sub>1</sub> + ... l<sub>n</sub>x<sub>n</sub> + l<sub>cst</sub> <= + * r<sub>1</sub>x<sub>1</sub> + ... r<sub>n</sub>x<sub>n</sub> + r<sub>cst</sub></li> + * <li>l<sub>1</sub>x<sub>1</sub> + ... l<sub>n</sub>x<sub>n</sub> + l<sub>cst</sub> >= + * r<sub>1</sub>x<sub>1</sub> + ... r<sub>n</sub>x<sub>n</sub> + r<sub>cst</sub></li> + * </ul> + * </p> + * @param lhsCoefficients The coefficients of the linear expression on the left hand side of the constraint + * @param lhsConstant The constant term of the linear expression on the left hand side of the constraint + * @param relationship The type of (in)equality used in the constraint + * @param rhsCoefficients The coefficients of the linear expression on the right hand side of the constraint + * @param rhsConstant The constant term of the linear expression on the right hand side of the constraint + */ + public LinearConstraint(final double[] lhsCoefficients, final double lhsConstant, + final Relationship relationship, + final double[] rhsCoefficients, final double rhsConstant) { + double[] sub = new double[lhsCoefficients.length]; + for (int i = 0; i < sub.length; ++i) { + sub[i] = lhsCoefficients[i] - rhsCoefficients[i]; + } + this.coefficients = new ArrayRealVector(sub, false); + this.relationship = relationship; + this.value = rhsConstant - lhsConstant; + } + + /** + * Build a constraint involving two linear equations. + * <p> + * A linear constraint with two linear equation has one of the forms: + * <ul> + * <li>l<sub>1</sub>x<sub>1</sub> + ... l<sub>n</sub>x<sub>n</sub> + l<sub>cst</sub> = + * r<sub>1</sub>x<sub>1</sub> + ... r<sub>n</sub>x<sub>n</sub> + r<sub>cst</sub></li> + * <li>l<sub>1</sub>x<sub>1</sub> + ... l<sub>n</sub>x<sub>n</sub> + l<sub>cst</sub> <= + * r<sub>1</sub>x<sub>1</sub> + ... r<sub>n</sub>x<sub>n</sub> + r<sub>cst</sub></li> + * <li>l<sub>1</sub>x<sub>1</sub> + ... l<sub>n</sub>x<sub>n</sub> + l<sub>cst</sub> >= + * r<sub>1</sub>x<sub>1</sub> + ... r<sub>n</sub>x<sub>n</sub> + r<sub>cst</sub></li> + * </ul> + * </p> + * @param lhsCoefficients The coefficients of the linear expression on the left hand side of the constraint + * @param lhsConstant The constant term of the linear expression on the left hand side of the constraint + * @param relationship The type of (in)equality used in the constraint + * @param rhsCoefficients The coefficients of the linear expression on the right hand side of the constraint + * @param rhsConstant The constant term of the linear expression on the right hand side of the constraint + */ + public LinearConstraint(final RealVector lhsCoefficients, final double lhsConstant, + final Relationship relationship, + final RealVector rhsCoefficients, final double rhsConstant) { + this.coefficients = lhsCoefficients.subtract(rhsCoefficients); + this.relationship = relationship; + this.value = rhsConstant - lhsConstant; + } + + /** + * Gets the coefficients of the constraint (left hand side). + * + * @return the coefficients of the constraint (left hand side). + */ + public RealVector getCoefficients() { + return coefficients; + } + + /** + * Gets the relationship between left and right hand sides. + * + * @return the relationship between left and right hand sides. + */ + public Relationship getRelationship() { + return relationship; + } + + /** + * Gets the value of the constraint (right hand side). + * + * @return the value of the constraint (right hand side). + */ + public double getValue() { + return value; + } + + /** {@inheritDoc} */ + @Override + public boolean equals(Object other) { + if (this == other) { + return true; + } + if (other instanceof LinearConstraint) { + LinearConstraint rhs = (LinearConstraint) other; + return relationship == rhs.relationship && + value == rhs.value && + coefficients.equals(rhs.coefficients); + } + return false; + } + + /** {@inheritDoc} */ + @Override + public int hashCode() { + return relationship.hashCode() ^ + Double.valueOf(value).hashCode() ^ + coefficients.hashCode(); + } + + /** + * Serialize the instance. + * @param oos stream where object should be written + * @throws IOException if object cannot be written to stream + */ + private void writeObject(ObjectOutputStream oos) + throws IOException { + oos.defaultWriteObject(); + MatrixUtils.serializeRealVector(coefficients, oos); + } + + /** + * Deserialize the instance. + * @param ois stream from which the object should be read + * @throws ClassNotFoundException if a class in the stream cannot be found + * @throws IOException if object cannot be read from the stream + */ + private void readObject(ObjectInputStream ois) + throws ClassNotFoundException, IOException { + ois.defaultReadObject(); + MatrixUtils.deserializeRealVector(this, "coefficients", ois); + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/linear/LinearConstraintSet.java b/src/main/java/org/apache/commons/math3/optim/linear/LinearConstraintSet.java new file mode 100644 index 0000000..d54bd61 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/linear/LinearConstraintSet.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim.linear; + +import java.util.LinkedHashSet; +import java.util.Set; +import java.util.Collection; +import java.util.Collections; + +import org.apache.commons.math3.optim.OptimizationData; + +/** + * Class that represents a set of {@link LinearConstraint linear constraints}. + * + * @since 3.1 + */ +public class LinearConstraintSet implements OptimizationData { + /** Set of constraints. */ + private final Set<LinearConstraint> linearConstraints = new LinkedHashSet<LinearConstraint>(); + + /** + * Creates a set containing the given constraints. + * + * @param constraints Constraints. + */ + public LinearConstraintSet(LinearConstraint... constraints) { + for (LinearConstraint c : constraints) { + linearConstraints.add(c); + } + } + + /** + * Creates a set containing the given constraints. + * + * @param constraints Constraints. + */ + public LinearConstraintSet(Collection<LinearConstraint> constraints) { + linearConstraints.addAll(constraints); + } + + /** + * Gets the set of linear constraints. + * + * @return the constraints. + */ + public Collection<LinearConstraint> getConstraints() { + return Collections.unmodifiableSet(linearConstraints); + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/linear/LinearObjectiveFunction.java b/src/main/java/org/apache/commons/math3/optim/linear/LinearObjectiveFunction.java new file mode 100644 index 0000000..6cff81f --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/linear/LinearObjectiveFunction.java @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim.linear; + +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.io.Serializable; +import org.apache.commons.math3.analysis.MultivariateFunction; +import org.apache.commons.math3.linear.MatrixUtils; +import org.apache.commons.math3.linear.RealVector; +import org.apache.commons.math3.linear.ArrayRealVector; +import org.apache.commons.math3.optim.OptimizationData; + +/** + * An objective function for a linear optimization problem. + * <p> + * A linear objective function has one the form: + * <pre> + * c<sub>1</sub>x<sub>1</sub> + ... c<sub>n</sub>x<sub>n</sub> + d + * </pre> + * The c<sub>i</sub> and d are the coefficients of the equation, + * the x<sub>i</sub> are the coordinates of the current point. + * </p> + * + * @since 2.0 + */ +public class LinearObjectiveFunction + implements MultivariateFunction, + OptimizationData, + Serializable { + /** Serializable version identifier. */ + private static final long serialVersionUID = -4531815507568396090L; + /** Coefficients of the linear equation (c<sub>i</sub>). */ + private final transient RealVector coefficients; + /** Constant term of the linear equation. */ + private final double constantTerm; + + /** + * @param coefficients Coefficients for the linear equation being optimized. + * @param constantTerm Constant term of the linear equation. + */ + public LinearObjectiveFunction(double[] coefficients, double constantTerm) { + this(new ArrayRealVector(coefficients), constantTerm); + } + + /** + * @param coefficients Coefficients for the linear equation being optimized. + * @param constantTerm Constant term of the linear equation. + */ + public LinearObjectiveFunction(RealVector coefficients, double constantTerm) { + this.coefficients = coefficients; + this.constantTerm = constantTerm; + } + + /** + * Gets the coefficients of the linear equation being optimized. + * + * @return coefficients of the linear equation being optimized. + */ + public RealVector getCoefficients() { + return coefficients; + } + + /** + * Gets the constant of the linear equation being optimized. + * + * @return constant of the linear equation being optimized. + */ + public double getConstantTerm() { + return constantTerm; + } + + /** + * Computes the value of the linear equation at the current point. + * + * @param point Point at which linear equation must be evaluated. + * @return the value of the linear equation at the current point. + */ + public double value(final double[] point) { + return value(new ArrayRealVector(point, false)); + } + + /** + * Computes the value of the linear equation at the current point. + * + * @param point Point at which linear equation must be evaluated. + * @return the value of the linear equation at the current point. + */ + public double value(final RealVector point) { + return coefficients.dotProduct(point) + constantTerm; + } + + /** {@inheritDoc} */ + @Override + public boolean equals(Object other) { + if (this == other) { + return true; + } + if (other instanceof LinearObjectiveFunction) { + LinearObjectiveFunction rhs = (LinearObjectiveFunction) other; + return (constantTerm == rhs.constantTerm) && coefficients.equals(rhs.coefficients); + } + + return false; + } + + /** {@inheritDoc} */ + @Override + public int hashCode() { + return Double.valueOf(constantTerm).hashCode() ^ coefficients.hashCode(); + } + + /** + * Serialize the instance. + * @param oos stream where object should be written + * @throws IOException if object cannot be written to stream + */ + private void writeObject(ObjectOutputStream oos) + throws IOException { + oos.defaultWriteObject(); + MatrixUtils.serializeRealVector(coefficients, oos); + } + + /** + * Deserialize the instance. + * @param ois stream from which the object should be read + * @throws ClassNotFoundException if a class in the stream cannot be found + * @throws IOException if object cannot be read from the stream + */ + private void readObject(ObjectInputStream ois) + throws ClassNotFoundException, IOException { + ois.defaultReadObject(); + MatrixUtils.deserializeRealVector(this, "coefficients", ois); + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/linear/LinearOptimizer.java b/src/main/java/org/apache/commons/math3/optim/linear/LinearOptimizer.java new file mode 100644 index 0000000..7e80687 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/linear/LinearOptimizer.java @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim.linear; + +import java.util.Collection; +import java.util.Collections; +import org.apache.commons.math3.exception.TooManyIterationsException; +import org.apache.commons.math3.optim.OptimizationData; +import org.apache.commons.math3.optim.PointValuePair; +import org.apache.commons.math3.optim.nonlinear.scalar.MultivariateOptimizer; + +/** + * Base class for implementing linear optimizers. + * + * @since 3.1 + */ +public abstract class LinearOptimizer + extends MultivariateOptimizer { + /** + * Linear objective function. + */ + private LinearObjectiveFunction function; + /** + * Linear constraints. + */ + private Collection<LinearConstraint> linearConstraints; + /** + * Whether to restrict the variables to non-negative values. + */ + private boolean nonNegative; + + /** + * Simple constructor with default settings. + * + */ + protected LinearOptimizer() { + super(null); // No convergence checker. + } + + /** + * @return {@code true} if the variables are restricted to non-negative values. + */ + protected boolean isRestrictedToNonNegative() { + return nonNegative; + } + + /** + * @return the optimization type. + */ + protected LinearObjectiveFunction getFunction() { + return function; + } + + /** + * @return the optimization type. + */ + protected Collection<LinearConstraint> getConstraints() { + return Collections.unmodifiableCollection(linearConstraints); + } + + /** + * {@inheritDoc} + * + * @param optData Optimization data. In addition to those documented in + * {@link MultivariateOptimizer#parseOptimizationData(OptimizationData[]) + * MultivariateOptimizer}, this method will register the following data: + * <ul> + * <li>{@link LinearObjectiveFunction}</li> + * <li>{@link LinearConstraintSet}</li> + * <li>{@link NonNegativeConstraint}</li> + * </ul> + * @return {@inheritDoc} + * @throws TooManyIterationsException if the maximal number of + * iterations is exceeded. + */ + @Override + public PointValuePair optimize(OptimizationData... optData) + throws TooManyIterationsException { + // Set up base class and perform computation. + return super.optimize(optData); + } + + /** + * Scans the list of (required and optional) optimization data that + * characterize the problem. + * + * @param optData Optimization data. + * The following data will be looked for: + * <ul> + * <li>{@link LinearObjectiveFunction}</li> + * <li>{@link LinearConstraintSet}</li> + * <li>{@link NonNegativeConstraint}</li> + * </ul> + */ + @Override + protected void parseOptimizationData(OptimizationData... optData) { + // Allow base class to register its own data. + super.parseOptimizationData(optData); + + // The existing values (as set by the previous call) are reused if + // not provided in the argument list. + for (OptimizationData data : optData) { + if (data instanceof LinearObjectiveFunction) { + function = (LinearObjectiveFunction) data; + continue; + } + if (data instanceof LinearConstraintSet) { + linearConstraints = ((LinearConstraintSet) data).getConstraints(); + continue; + } + if (data instanceof NonNegativeConstraint) { + nonNegative = ((NonNegativeConstraint) data).isRestrictedToNonNegative(); + continue; + } + } + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/linear/NoFeasibleSolutionException.java b/src/main/java/org/apache/commons/math3/optim/linear/NoFeasibleSolutionException.java new file mode 100644 index 0000000..cbe8321 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/linear/NoFeasibleSolutionException.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim.linear; + +import org.apache.commons.math3.exception.MathIllegalStateException; +import org.apache.commons.math3.exception.util.LocalizedFormats; + +/** + * This class represents exceptions thrown by optimizers when no solution fulfills the constraints. + * + * @since 2.0 + */ +public class NoFeasibleSolutionException extends MathIllegalStateException { + /** Serializable version identifier. */ + private static final long serialVersionUID = -3044253632189082760L; + + /** + * Simple constructor using a default message. + */ + public NoFeasibleSolutionException() { + super(LocalizedFormats.NO_FEASIBLE_SOLUTION); + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/linear/NonNegativeConstraint.java b/src/main/java/org/apache/commons/math3/optim/linear/NonNegativeConstraint.java new file mode 100644 index 0000000..dafcb63 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/linear/NonNegativeConstraint.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim.linear; + +import org.apache.commons.math3.optim.OptimizationData; + +/** + * A constraint for a linear optimization problem indicating whether all + * variables must be restricted to non-negative values. + * + * @since 3.1 + */ +public class NonNegativeConstraint implements OptimizationData { + /** Whether the variables are all positive. */ + private final boolean isRestricted; + + /** + * @param restricted If {@code true}, all the variables must be positive. + */ + public NonNegativeConstraint(boolean restricted) { + isRestricted = restricted; + } + + /** + * Indicates whether all the variables must be restricted to non-negative + * values. + * + * @return {@code true} if all the variables must be positive. + */ + public boolean isRestrictedToNonNegative() { + return isRestricted; + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/linear/PivotSelectionRule.java b/src/main/java/org/apache/commons/math3/optim/linear/PivotSelectionRule.java new file mode 100644 index 0000000..a2a2765 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/linear/PivotSelectionRule.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim.linear; + +import org.apache.commons.math3.optim.OptimizationData; + +/** + * Pivot selection rule to the use for a Simplex solver. + * + * @since 3.3 + */ +public enum PivotSelectionRule implements OptimizationData { + /** + * The classical rule, the variable with the most negative coefficient + * in the objective function row will be chosen as entering variable. + */ + DANTZIG, + /** + * The first variable with a negative coefficient in the objective function + * row will be chosen as entering variable. This rule guarantees to prevent + * cycles, but may take longer to find an optimal solution. + */ + BLAND +} diff --git a/src/main/java/org/apache/commons/math3/optim/linear/Relationship.java b/src/main/java/org/apache/commons/math3/optim/linear/Relationship.java new file mode 100644 index 0000000..f88c938 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/linear/Relationship.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim.linear; + +/** + * Types of relationships between two cells in a Solver {@link LinearConstraint}. + * + * @since 2.0 + */ +public enum Relationship { + /** Equality relationship. */ + EQ("="), + /** Lesser than or equal relationship. */ + LEQ("<="), + /** Greater than or equal relationship. */ + GEQ(">="); + + /** Display string for the relationship. */ + private final String stringValue; + + /** + * Simple constructor. + * + * @param stringValue Display string for the relationship. + */ + Relationship(String stringValue) { + this.stringValue = stringValue; + } + + /** {@inheritDoc} */ + @Override + public String toString() { + return stringValue; + } + + /** + * Gets the relationship obtained when multiplying all coefficients by -1. + * + * @return the opposite relationship. + */ + public Relationship oppositeRelationship() { + switch (this) { + case LEQ : + return GEQ; + case GEQ : + return LEQ; + default : + return EQ; + } + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/linear/SimplexSolver.java b/src/main/java/org/apache/commons/math3/optim/linear/SimplexSolver.java new file mode 100644 index 0000000..e95b657 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/linear/SimplexSolver.java @@ -0,0 +1,407 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim.linear; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.math3.exception.TooManyIterationsException; +import org.apache.commons.math3.optim.OptimizationData; +import org.apache.commons.math3.optim.PointValuePair; +import org.apache.commons.math3.util.FastMath; +import org.apache.commons.math3.util.Precision; + +/** + * Solves a linear problem using the "Two-Phase Simplex" method. + * <p> + * The {@link SimplexSolver} supports the following {@link OptimizationData} data provided + * as arguments to {@link #optimize(OptimizationData...)}: + * <ul> + * <li>objective function: {@link LinearObjectiveFunction} - mandatory</li> + * <li>linear constraints {@link LinearConstraintSet} - mandatory</li> + * <li>type of optimization: {@link org.apache.commons.math3.optim.nonlinear.scalar.GoalType GoalType} + * - optional, default: {@link org.apache.commons.math3.optim.nonlinear.scalar.GoalType#MINIMIZE MINIMIZE}</li> + * <li>whether to allow negative values as solution: {@link NonNegativeConstraint} - optional, default: true</li> + * <li>pivot selection rule: {@link PivotSelectionRule} - optional, default {@link PivotSelectionRule#DANTZIG}</li> + * <li>callback for the best solution: {@link SolutionCallback} - optional</li> + * <li>maximum number of iterations: {@link org.apache.commons.math3.optim.MaxIter} - optional, default: {@link Integer#MAX_VALUE}</li> + * </ul> + * <p> + * <b>Note:</b> Depending on the problem definition, the default convergence criteria + * may be too strict, resulting in {@link NoFeasibleSolutionException} or + * {@link TooManyIterationsException}. In such a case it is advised to adjust these + * criteria with more appropriate values, e.g. relaxing the epsilon value. + * <p> + * Default convergence criteria: + * <ul> + * <li>Algorithm convergence: 1e-6</li> + * <li>Floating-point comparisons: 10 ulp</li> + * <li>Cut-Off value: 1e-10</li> + * </ul> + * <p> + * The cut-off value has been introduced to handle the case of very small pivot elements + * in the Simplex tableau, as these may lead to numerical instabilities and degeneracy. + * Potential pivot elements smaller than this value will be treated as if they were zero + * and are thus not considered by the pivot selection mechanism. The default value is safe + * for many problems, but may need to be adjusted in case of very small coefficients + * used in either the {@link LinearConstraint} or {@link LinearObjectiveFunction}. + * + * @since 2.0 + */ +public class SimplexSolver extends LinearOptimizer { + /** Default amount of error to accept in floating point comparisons (as ulps). */ + static final int DEFAULT_ULPS = 10; + + /** Default cut-off value. */ + static final double DEFAULT_CUT_OFF = 1e-10; + + /** Default amount of error to accept for algorithm convergence. */ + private static final double DEFAULT_EPSILON = 1.0e-6; + + /** Amount of error to accept for algorithm convergence. */ + private final double epsilon; + + /** Amount of error to accept in floating point comparisons (as ulps). */ + private final int maxUlps; + + /** + * Cut-off value for entries in the tableau: values smaller than the cut-off + * are treated as zero to improve numerical stability. + */ + private final double cutOff; + + /** The pivot selection method to use. */ + private PivotSelectionRule pivotSelection; + + /** + * The solution callback to access the best solution found so far in case + * the optimizer fails to find an optimal solution within the iteration limits. + */ + private SolutionCallback solutionCallback; + + /** + * Builds a simplex solver with default settings. + */ + public SimplexSolver() { + this(DEFAULT_EPSILON, DEFAULT_ULPS, DEFAULT_CUT_OFF); + } + + /** + * Builds a simplex solver with a specified accepted amount of error. + * + * @param epsilon Amount of error to accept for algorithm convergence. + */ + public SimplexSolver(final double epsilon) { + this(epsilon, DEFAULT_ULPS, DEFAULT_CUT_OFF); + } + + /** + * Builds a simplex solver with a specified accepted amount of error. + * + * @param epsilon Amount of error to accept for algorithm convergence. + * @param maxUlps Amount of error to accept in floating point comparisons. + */ + public SimplexSolver(final double epsilon, final int maxUlps) { + this(epsilon, maxUlps, DEFAULT_CUT_OFF); + } + + /** + * Builds a simplex solver with a specified accepted amount of error. + * + * @param epsilon Amount of error to accept for algorithm convergence. + * @param maxUlps Amount of error to accept in floating point comparisons. + * @param cutOff Values smaller than the cutOff are treated as zero. + */ + public SimplexSolver(final double epsilon, final int maxUlps, final double cutOff) { + this.epsilon = epsilon; + this.maxUlps = maxUlps; + this.cutOff = cutOff; + this.pivotSelection = PivotSelectionRule.DANTZIG; + } + + /** + * {@inheritDoc} + * + * @param optData Optimization data. In addition to those documented in + * {@link LinearOptimizer#optimize(OptimizationData...) + * LinearOptimizer}, this method will register the following data: + * <ul> + * <li>{@link SolutionCallback}</li> + * <li>{@link PivotSelectionRule}</li> + * </ul> + * + * @return {@inheritDoc} + * @throws TooManyIterationsException if the maximal number of iterations is exceeded. + */ + @Override + public PointValuePair optimize(OptimizationData... optData) + throws TooManyIterationsException { + // Set up base class and perform computation. + return super.optimize(optData); + } + + /** + * {@inheritDoc} + * + * @param optData Optimization data. + * In addition to those documented in + * {@link LinearOptimizer#parseOptimizationData(OptimizationData[]) + * LinearOptimizer}, this method will register the following data: + * <ul> + * <li>{@link SolutionCallback}</li> + * <li>{@link PivotSelectionRule}</li> + * </ul> + */ + @Override + protected void parseOptimizationData(OptimizationData... optData) { + // Allow base class to register its own data. + super.parseOptimizationData(optData); + + // reset the callback before parsing + solutionCallback = null; + + for (OptimizationData data : optData) { + if (data instanceof SolutionCallback) { + solutionCallback = (SolutionCallback) data; + continue; + } + if (data instanceof PivotSelectionRule) { + pivotSelection = (PivotSelectionRule) data; + continue; + } + } + } + + /** + * Returns the column with the most negative coefficient in the objective function row. + * + * @param tableau Simple tableau for the problem. + * @return the column with the most negative coefficient. + */ + private Integer getPivotColumn(SimplexTableau tableau) { + double minValue = 0; + Integer minPos = null; + for (int i = tableau.getNumObjectiveFunctions(); i < tableau.getWidth() - 1; i++) { + final double entry = tableau.getEntry(0, i); + // check if the entry is strictly smaller than the current minimum + // do not use a ulp/epsilon check + if (entry < minValue) { + minValue = entry; + minPos = i; + + // Bland's rule: chose the entering column with the lowest index + if (pivotSelection == PivotSelectionRule.BLAND && isValidPivotColumn(tableau, i)) { + break; + } + } + } + return minPos; + } + + /** + * Checks whether the given column is valid pivot column, i.e. will result + * in a valid pivot row. + * <p> + * When applying Bland's rule to select the pivot column, it may happen that + * there is no corresponding pivot row. This method will check if the selected + * pivot column will return a valid pivot row. + * + * @param tableau simplex tableau for the problem + * @param col the column to test + * @return {@code true} if the pivot column is valid, {@code false} otherwise + */ + private boolean isValidPivotColumn(SimplexTableau tableau, int col) { + for (int i = tableau.getNumObjectiveFunctions(); i < tableau.getHeight(); i++) { + final double entry = tableau.getEntry(i, col); + + // do the same check as in getPivotRow + if (Precision.compareTo(entry, 0d, cutOff) > 0) { + return true; + } + } + return false; + } + + /** + * Returns the row with the minimum ratio as given by the minimum ratio test (MRT). + * + * @param tableau Simplex tableau for the problem. + * @param col Column to test the ratio of (see {@link #getPivotColumn(SimplexTableau)}). + * @return the row with the minimum ratio. + */ + private Integer getPivotRow(SimplexTableau tableau, final int col) { + // create a list of all the rows that tie for the lowest score in the minimum ratio test + List<Integer> minRatioPositions = new ArrayList<Integer>(); + double minRatio = Double.MAX_VALUE; + for (int i = tableau.getNumObjectiveFunctions(); i < tableau.getHeight(); i++) { + final double rhs = tableau.getEntry(i, tableau.getWidth() - 1); + final double entry = tableau.getEntry(i, col); + + // only consider pivot elements larger than the cutOff threshold + // selecting others may lead to degeneracy or numerical instabilities + if (Precision.compareTo(entry, 0d, cutOff) > 0) { + final double ratio = FastMath.abs(rhs / entry); + // check if the entry is strictly equal to the current min ratio + // do not use a ulp/epsilon check + final int cmp = Double.compare(ratio, minRatio); + if (cmp == 0) { + minRatioPositions.add(i); + } else if (cmp < 0) { + minRatio = ratio; + minRatioPositions.clear(); + minRatioPositions.add(i); + } + } + } + + if (minRatioPositions.size() == 0) { + return null; + } else if (minRatioPositions.size() > 1) { + // there's a degeneracy as indicated by a tie in the minimum ratio test + + // 1. check if there's an artificial variable that can be forced out of the basis + if (tableau.getNumArtificialVariables() > 0) { + for (Integer row : minRatioPositions) { + for (int i = 0; i < tableau.getNumArtificialVariables(); i++) { + int column = i + tableau.getArtificialVariableOffset(); + final double entry = tableau.getEntry(row, column); + if (Precision.equals(entry, 1d, maxUlps) && row.equals(tableau.getBasicRow(column))) { + return row; + } + } + } + } + + // 2. apply Bland's rule to prevent cycling: + // take the row for which the corresponding basic variable has the smallest index + // + // see http://www.stanford.edu/class/msande310/blandrule.pdf + // see http://en.wikipedia.org/wiki/Bland%27s_rule (not equivalent to the above paper) + + Integer minRow = null; + int minIndex = tableau.getWidth(); + for (Integer row : minRatioPositions) { + final int basicVar = tableau.getBasicVariable(row); + if (basicVar < minIndex) { + minIndex = basicVar; + minRow = row; + } + } + return minRow; + } + return minRatioPositions.get(0); + } + + /** + * Runs one iteration of the Simplex method on the given model. + * + * @param tableau Simple tableau for the problem. + * @throws TooManyIterationsException if the allowed number of iterations has been exhausted. + * @throws UnboundedSolutionException if the model is found not to have a bounded solution. + */ + protected void doIteration(final SimplexTableau tableau) + throws TooManyIterationsException, + UnboundedSolutionException { + + incrementIterationCount(); + + Integer pivotCol = getPivotColumn(tableau); + Integer pivotRow = getPivotRow(tableau, pivotCol); + if (pivotRow == null) { + throw new UnboundedSolutionException(); + } + + tableau.performRowOperations(pivotCol, pivotRow); + } + + /** + * Solves Phase 1 of the Simplex method. + * + * @param tableau Simple tableau for the problem. + * @throws TooManyIterationsException if the allowed number of iterations has been exhausted. + * @throws UnboundedSolutionException if the model is found not to have a bounded solution. + * @throws NoFeasibleSolutionException if there is no feasible solution? + */ + protected void solvePhase1(final SimplexTableau tableau) + throws TooManyIterationsException, + UnboundedSolutionException, + NoFeasibleSolutionException { + + // make sure we're in Phase 1 + if (tableau.getNumArtificialVariables() == 0) { + return; + } + + while (!tableau.isOptimal()) { + doIteration(tableau); + } + + // if W is not zero then we have no feasible solution + if (!Precision.equals(tableau.getEntry(0, tableau.getRhsOffset()), 0d, epsilon)) { + throw new NoFeasibleSolutionException(); + } + } + + /** {@inheritDoc} */ + @Override + public PointValuePair doOptimize() + throws TooManyIterationsException, + UnboundedSolutionException, + NoFeasibleSolutionException { + + // reset the tableau to indicate a non-feasible solution in case + // we do not pass phase 1 successfully + if (solutionCallback != null) { + solutionCallback.setTableau(null); + } + + final SimplexTableau tableau = + new SimplexTableau(getFunction(), + getConstraints(), + getGoalType(), + isRestrictedToNonNegative(), + epsilon, + maxUlps); + + solvePhase1(tableau); + tableau.dropPhase1Objective(); + + // after phase 1, we are sure to have a feasible solution + if (solutionCallback != null) { + solutionCallback.setTableau(tableau); + } + + while (!tableau.isOptimal()) { + doIteration(tableau); + } + + // check that the solution respects the nonNegative restriction in case + // the epsilon/cutOff values are too large for the actual linear problem + // (e.g. with very small constraint coefficients), the solver might actually + // find a non-valid solution (with negative coefficients). + final PointValuePair solution = tableau.getSolution(); + if (isRestrictedToNonNegative()) { + final double[] coeff = solution.getPoint(); + for (int i = 0; i < coeff.length; i++) { + if (Precision.compareTo(coeff[i], 0, epsilon) < 0) { + throw new NoFeasibleSolutionException(); + } + } + } + return solution; + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/linear/SimplexTableau.java b/src/main/java/org/apache/commons/math3/optim/linear/SimplexTableau.java new file mode 100644 index 0000000..31e71d2 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/linear/SimplexTableau.java @@ -0,0 +1,713 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim.linear; + +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.TreeSet; + +import org.apache.commons.math3.linear.Array2DRowRealMatrix; +import org.apache.commons.math3.linear.MatrixUtils; +import org.apache.commons.math3.linear.RealVector; +import org.apache.commons.math3.optim.nonlinear.scalar.GoalType; +import org.apache.commons.math3.optim.PointValuePair; +import org.apache.commons.math3.util.Precision; + +/** + * A tableau for use in the Simplex method. + * + * <p> + * Example: + * <pre> + * W | Z | x1 | x2 | x- | s1 | s2 | a1 | RHS + * --------------------------------------------------- + * -1 0 0 0 0 0 0 1 0 <= phase 1 objective + * 0 1 -15 -10 0 0 0 0 0 <= phase 2 objective + * 0 0 1 0 0 1 0 0 2 <= constraint 1 + * 0 0 0 1 0 0 1 0 3 <= constraint 2 + * 0 0 1 1 0 0 0 1 4 <= constraint 3 + * </pre> + * W: Phase 1 objective function</br> + * Z: Phase 2 objective function</br> + * x1 & x2: Decision variables</br> + * x-: Extra decision variable to allow for negative values</br> + * s1 & s2: Slack/Surplus variables</br> + * a1: Artificial variable</br> + * RHS: Right hand side</br> + * </p> + * @since 2.0 + */ +class SimplexTableau implements Serializable { + + /** Column label for negative vars. */ + private static final String NEGATIVE_VAR_COLUMN_LABEL = "x-"; + + /** Serializable version identifier. */ + private static final long serialVersionUID = -1369660067587938365L; + + /** Linear objective function. */ + private final LinearObjectiveFunction f; + + /** Linear constraints. */ + private final List<LinearConstraint> constraints; + + /** Whether to restrict the variables to non-negative values. */ + private final boolean restrictToNonNegative; + + /** The variables each column represents */ + private final List<String> columnLabels = new ArrayList<String>(); + + /** Simple tableau. */ + private transient Array2DRowRealMatrix tableau; + + /** Number of decision variables. */ + private final int numDecisionVariables; + + /** Number of slack variables. */ + private final int numSlackVariables; + + /** Number of artificial variables. */ + private int numArtificialVariables; + + /** Amount of error to accept when checking for optimality. */ + private final double epsilon; + + /** Amount of error to accept in floating point comparisons. */ + private final int maxUlps; + + /** Maps basic variables to row they are basic in. */ + private int[] basicVariables; + + /** Maps rows to their corresponding basic variables. */ + private int[] basicRows; + + /** + * Builds a tableau for a linear problem. + * + * @param f Linear objective function. + * @param constraints Linear constraints. + * @param goalType Optimization goal: either {@link GoalType#MAXIMIZE} + * or {@link GoalType#MINIMIZE}. + * @param restrictToNonNegative Whether to restrict the variables to non-negative values. + * @param epsilon Amount of error to accept when checking for optimality. + */ + SimplexTableau(final LinearObjectiveFunction f, + final Collection<LinearConstraint> constraints, + final GoalType goalType, + final boolean restrictToNonNegative, + final double epsilon) { + this(f, constraints, goalType, restrictToNonNegative, epsilon, SimplexSolver.DEFAULT_ULPS); + } + + /** + * Build a tableau for a linear problem. + * @param f linear objective function + * @param constraints linear constraints + * @param goalType type of optimization goal: either {@link GoalType#MAXIMIZE} or {@link GoalType#MINIMIZE} + * @param restrictToNonNegative whether to restrict the variables to non-negative values + * @param epsilon amount of error to accept when checking for optimality + * @param maxUlps amount of error to accept in floating point comparisons + */ + SimplexTableau(final LinearObjectiveFunction f, + final Collection<LinearConstraint> constraints, + final GoalType goalType, + final boolean restrictToNonNegative, + final double epsilon, + final int maxUlps) { + this.f = f; + this.constraints = normalizeConstraints(constraints); + this.restrictToNonNegative = restrictToNonNegative; + this.epsilon = epsilon; + this.maxUlps = maxUlps; + this.numDecisionVariables = f.getCoefficients().getDimension() + (restrictToNonNegative ? 0 : 1); + this.numSlackVariables = getConstraintTypeCounts(Relationship.LEQ) + + getConstraintTypeCounts(Relationship.GEQ); + this.numArtificialVariables = getConstraintTypeCounts(Relationship.EQ) + + getConstraintTypeCounts(Relationship.GEQ); + this.tableau = createTableau(goalType == GoalType.MAXIMIZE); + // initialize the basic variables for phase 1: + // we know that only slack or artificial variables can be basic + initializeBasicVariables(getSlackVariableOffset()); + initializeColumnLabels(); + } + + /** + * Initialize the labels for the columns. + */ + protected void initializeColumnLabels() { + if (getNumObjectiveFunctions() == 2) { + columnLabels.add("W"); + } + columnLabels.add("Z"); + for (int i = 0; i < getOriginalNumDecisionVariables(); i++) { + columnLabels.add("x" + i); + } + if (!restrictToNonNegative) { + columnLabels.add(NEGATIVE_VAR_COLUMN_LABEL); + } + for (int i = 0; i < getNumSlackVariables(); i++) { + columnLabels.add("s" + i); + } + for (int i = 0; i < getNumArtificialVariables(); i++) { + columnLabels.add("a" + i); + } + columnLabels.add("RHS"); + } + + /** + * Create the tableau by itself. + * @param maximize if true, goal is to maximize the objective function + * @return created tableau + */ + protected Array2DRowRealMatrix createTableau(final boolean maximize) { + + // create a matrix of the correct size + int width = numDecisionVariables + numSlackVariables + + numArtificialVariables + getNumObjectiveFunctions() + 1; // + 1 is for RHS + int height = constraints.size() + getNumObjectiveFunctions(); + Array2DRowRealMatrix matrix = new Array2DRowRealMatrix(height, width); + + // initialize the objective function rows + if (getNumObjectiveFunctions() == 2) { + matrix.setEntry(0, 0, -1); + } + + int zIndex = (getNumObjectiveFunctions() == 1) ? 0 : 1; + matrix.setEntry(zIndex, zIndex, maximize ? 1 : -1); + RealVector objectiveCoefficients = maximize ? f.getCoefficients().mapMultiply(-1) : f.getCoefficients(); + copyArray(objectiveCoefficients.toArray(), matrix.getDataRef()[zIndex]); + matrix.setEntry(zIndex, width - 1, maximize ? f.getConstantTerm() : -1 * f.getConstantTerm()); + + if (!restrictToNonNegative) { + matrix.setEntry(zIndex, getSlackVariableOffset() - 1, + getInvertedCoefficientSum(objectiveCoefficients)); + } + + // initialize the constraint rows + int slackVar = 0; + int artificialVar = 0; + for (int i = 0; i < constraints.size(); i++) { + LinearConstraint constraint = constraints.get(i); + int row = getNumObjectiveFunctions() + i; + + // decision variable coefficients + copyArray(constraint.getCoefficients().toArray(), matrix.getDataRef()[row]); + + // x- + if (!restrictToNonNegative) { + matrix.setEntry(row, getSlackVariableOffset() - 1, + getInvertedCoefficientSum(constraint.getCoefficients())); + } + + // RHS + matrix.setEntry(row, width - 1, constraint.getValue()); + + // slack variables + if (constraint.getRelationship() == Relationship.LEQ) { + matrix.setEntry(row, getSlackVariableOffset() + slackVar++, 1); // slack + } else if (constraint.getRelationship() == Relationship.GEQ) { + matrix.setEntry(row, getSlackVariableOffset() + slackVar++, -1); // excess + } + + // artificial variables + if ((constraint.getRelationship() == Relationship.EQ) || + (constraint.getRelationship() == Relationship.GEQ)) { + matrix.setEntry(0, getArtificialVariableOffset() + artificialVar, 1); + matrix.setEntry(row, getArtificialVariableOffset() + artificialVar++, 1); + matrix.setRowVector(0, matrix.getRowVector(0).subtract(matrix.getRowVector(row))); + } + } + + return matrix; + } + + /** + * Get new versions of the constraints which have positive right hand sides. + * @param originalConstraints original (not normalized) constraints + * @return new versions of the constraints + */ + public List<LinearConstraint> normalizeConstraints(Collection<LinearConstraint> originalConstraints) { + List<LinearConstraint> normalized = new ArrayList<LinearConstraint>(originalConstraints.size()); + for (LinearConstraint constraint : originalConstraints) { + normalized.add(normalize(constraint)); + } + return normalized; + } + + /** + * Get a new equation equivalent to this one with a positive right hand side. + * @param constraint reference constraint + * @return new equation + */ + private LinearConstraint normalize(final LinearConstraint constraint) { + if (constraint.getValue() < 0) { + return new LinearConstraint(constraint.getCoefficients().mapMultiply(-1), + constraint.getRelationship().oppositeRelationship(), + -1 * constraint.getValue()); + } + return new LinearConstraint(constraint.getCoefficients(), + constraint.getRelationship(), constraint.getValue()); + } + + /** + * Get the number of objective functions in this tableau. + * @return 2 for Phase 1. 1 for Phase 2. + */ + protected final int getNumObjectiveFunctions() { + return this.numArtificialVariables > 0 ? 2 : 1; + } + + /** + * Get a count of constraints corresponding to a specified relationship. + * @param relationship relationship to count + * @return number of constraint with the specified relationship + */ + private int getConstraintTypeCounts(final Relationship relationship) { + int count = 0; + for (final LinearConstraint constraint : constraints) { + if (constraint.getRelationship() == relationship) { + ++count; + } + } + return count; + } + + /** + * Get the -1 times the sum of all coefficients in the given array. + * @param coefficients coefficients to sum + * @return the -1 times the sum of all coefficients in the given array. + */ + protected static double getInvertedCoefficientSum(final RealVector coefficients) { + double sum = 0; + for (double coefficient : coefficients.toArray()) { + sum -= coefficient; + } + return sum; + } + + /** + * Checks whether the given column is basic. + * @param col index of the column to check + * @return the row that the variable is basic in. null if the column is not basic + */ + protected Integer getBasicRow(final int col) { + final int row = basicVariables[col]; + return row == -1 ? null : row; + } + + /** + * Returns the variable that is basic in this row. + * @param row the index of the row to check + * @return the variable that is basic for this row. + */ + protected int getBasicVariable(final int row) { + return basicRows[row]; + } + + /** + * Initializes the basic variable / row mapping. + * @param startColumn the column to start + */ + private void initializeBasicVariables(final int startColumn) { + basicVariables = new int[getWidth() - 1]; + basicRows = new int[getHeight()]; + + Arrays.fill(basicVariables, -1); + + for (int i = startColumn; i < getWidth() - 1; i++) { + Integer row = findBasicRow(i); + if (row != null) { + basicVariables[i] = row; + basicRows[row] = i; + } + } + } + + /** + * Returns the row in which the given column is basic. + * @param col index of the column + * @return the row that the variable is basic in, or {@code null} if the variable is not basic. + */ + private Integer findBasicRow(final int col) { + Integer row = null; + for (int i = 0; i < getHeight(); i++) { + final double entry = getEntry(i, col); + if (Precision.equals(entry, 1d, maxUlps) && (row == null)) { + row = i; + } else if (!Precision.equals(entry, 0d, maxUlps)) { + return null; + } + } + return row; + } + + /** + * Removes the phase 1 objective function, positive cost non-artificial variables, + * and the non-basic artificial variables from this tableau. + */ + protected void dropPhase1Objective() { + if (getNumObjectiveFunctions() == 1) { + return; + } + + final Set<Integer> columnsToDrop = new TreeSet<Integer>(); + columnsToDrop.add(0); + + // positive cost non-artificial variables + for (int i = getNumObjectiveFunctions(); i < getArtificialVariableOffset(); i++) { + final double entry = getEntry(0, i); + if (Precision.compareTo(entry, 0d, epsilon) > 0) { + columnsToDrop.add(i); + } + } + + // non-basic artificial variables + for (int i = 0; i < getNumArtificialVariables(); i++) { + int col = i + getArtificialVariableOffset(); + if (getBasicRow(col) == null) { + columnsToDrop.add(col); + } + } + + final double[][] matrix = new double[getHeight() - 1][getWidth() - columnsToDrop.size()]; + for (int i = 1; i < getHeight(); i++) { + int col = 0; + for (int j = 0; j < getWidth(); j++) { + if (!columnsToDrop.contains(j)) { + matrix[i - 1][col++] = getEntry(i, j); + } + } + } + + // remove the columns in reverse order so the indices are correct + Integer[] drop = columnsToDrop.toArray(new Integer[columnsToDrop.size()]); + for (int i = drop.length - 1; i >= 0; i--) { + columnLabels.remove((int) drop[i]); + } + + this.tableau = new Array2DRowRealMatrix(matrix); + this.numArtificialVariables = 0; + // need to update the basic variable mappings as row/columns have been dropped + initializeBasicVariables(getNumObjectiveFunctions()); + } + + /** + * @param src the source array + * @param dest the destination array + */ + private void copyArray(final double[] src, final double[] dest) { + System.arraycopy(src, 0, dest, getNumObjectiveFunctions(), src.length); + } + + /** + * Returns whether the problem is at an optimal state. + * @return whether the model has been solved + */ + boolean isOptimal() { + final double[] objectiveFunctionRow = getRow(0); + final int end = getRhsOffset(); + for (int i = getNumObjectiveFunctions(); i < end; i++) { + final double entry = objectiveFunctionRow[i]; + if (Precision.compareTo(entry, 0d, epsilon) < 0) { + return false; + } + } + return true; + } + + /** + * Get the current solution. + * @return current solution + */ + protected PointValuePair getSolution() { + int negativeVarColumn = columnLabels.indexOf(NEGATIVE_VAR_COLUMN_LABEL); + Integer negativeVarBasicRow = negativeVarColumn > 0 ? getBasicRow(negativeVarColumn) : null; + double mostNegative = negativeVarBasicRow == null ? 0 : getEntry(negativeVarBasicRow, getRhsOffset()); + + final Set<Integer> usedBasicRows = new HashSet<Integer>(); + final double[] coefficients = new double[getOriginalNumDecisionVariables()]; + for (int i = 0; i < coefficients.length; i++) { + int colIndex = columnLabels.indexOf("x" + i); + if (colIndex < 0) { + coefficients[i] = 0; + continue; + } + Integer basicRow = getBasicRow(colIndex); + if (basicRow != null && basicRow == 0) { + // if the basic row is found to be the objective function row + // set the coefficient to 0 -> this case handles unconstrained + // variables that are still part of the objective function + coefficients[i] = 0; + } else if (usedBasicRows.contains(basicRow)) { + // if multiple variables can take a given value + // then we choose the first and set the rest equal to 0 + coefficients[i] = 0 - (restrictToNonNegative ? 0 : mostNegative); + } else { + usedBasicRows.add(basicRow); + coefficients[i] = + (basicRow == null ? 0 : getEntry(basicRow, getRhsOffset())) - + (restrictToNonNegative ? 0 : mostNegative); + } + } + return new PointValuePair(coefficients, f.value(coefficients)); + } + + /** + * Perform the row operations of the simplex algorithm with the selected + * pivot column and row. + * @param pivotCol the pivot column + * @param pivotRow the pivot row + */ + protected void performRowOperations(int pivotCol, int pivotRow) { + // set the pivot element to 1 + final double pivotVal = getEntry(pivotRow, pivotCol); + divideRow(pivotRow, pivotVal); + + // set the rest of the pivot column to 0 + for (int i = 0; i < getHeight(); i++) { + if (i != pivotRow) { + final double multiplier = getEntry(i, pivotCol); + if (multiplier != 0.0) { + subtractRow(i, pivotRow, multiplier); + } + } + } + + // update the basic variable mappings + final int previousBasicVariable = getBasicVariable(pivotRow); + basicVariables[previousBasicVariable] = -1; + basicVariables[pivotCol] = pivotRow; + basicRows[pivotRow] = pivotCol; + } + + /** + * Divides one row by a given divisor. + * <p> + * After application of this operation, the following will hold: + * <pre>dividendRow = dividendRow / divisor</pre> + * + * @param dividendRowIndex index of the row + * @param divisor value of the divisor + */ + protected void divideRow(final int dividendRowIndex, final double divisor) { + final double[] dividendRow = getRow(dividendRowIndex); + for (int j = 0; j < getWidth(); j++) { + dividendRow[j] /= divisor; + } + } + + /** + * Subtracts a multiple of one row from another. + * <p> + * After application of this operation, the following will hold: + * <pre>minuendRow = minuendRow - multiple * subtrahendRow</pre> + * + * @param minuendRowIndex row index + * @param subtrahendRowIndex row index + * @param multiplier multiplication factor + */ + protected void subtractRow(final int minuendRowIndex, final int subtrahendRowIndex, final double multiplier) { + final double[] minuendRow = getRow(minuendRowIndex); + final double[] subtrahendRow = getRow(subtrahendRowIndex); + for (int i = 0; i < getWidth(); i++) { + minuendRow[i] -= subtrahendRow[i] * multiplier; + } + } + + /** + * Get the width of the tableau. + * @return width of the tableau + */ + protected final int getWidth() { + return tableau.getColumnDimension(); + } + + /** + * Get the height of the tableau. + * @return height of the tableau + */ + protected final int getHeight() { + return tableau.getRowDimension(); + } + + /** + * Get an entry of the tableau. + * @param row row index + * @param column column index + * @return entry at (row, column) + */ + protected final double getEntry(final int row, final int column) { + return tableau.getEntry(row, column); + } + + /** + * Set an entry of the tableau. + * @param row row index + * @param column column index + * @param value for the entry + */ + protected final void setEntry(final int row, final int column, final double value) { + tableau.setEntry(row, column, value); + } + + /** + * Get the offset of the first slack variable. + * @return offset of the first slack variable + */ + protected final int getSlackVariableOffset() { + return getNumObjectiveFunctions() + numDecisionVariables; + } + + /** + * Get the offset of the first artificial variable. + * @return offset of the first artificial variable + */ + protected final int getArtificialVariableOffset() { + return getNumObjectiveFunctions() + numDecisionVariables + numSlackVariables; + } + + /** + * Get the offset of the right hand side. + * @return offset of the right hand side + */ + protected final int getRhsOffset() { + return getWidth() - 1; + } + + /** + * Get the number of decision variables. + * <p> + * If variables are not restricted to positive values, this will include 1 extra decision variable to represent + * the absolute value of the most negative variable. + * + * @return number of decision variables + * @see #getOriginalNumDecisionVariables() + */ + protected final int getNumDecisionVariables() { + return numDecisionVariables; + } + + /** + * Get the original number of decision variables. + * @return original number of decision variables + * @see #getNumDecisionVariables() + */ + protected final int getOriginalNumDecisionVariables() { + return f.getCoefficients().getDimension(); + } + + /** + * Get the number of slack variables. + * @return number of slack variables + */ + protected final int getNumSlackVariables() { + return numSlackVariables; + } + + /** + * Get the number of artificial variables. + * @return number of artificial variables + */ + protected final int getNumArtificialVariables() { + return numArtificialVariables; + } + + /** + * Get the row from the tableau. + * @param row the row index + * @return the reference to the underlying row data + */ + protected final double[] getRow(int row) { + return tableau.getDataRef()[row]; + } + + /** + * Get the tableau data. + * @return tableau data + */ + protected final double[][] getData() { + return tableau.getData(); + } + + /** {@inheritDoc} */ + @Override + public boolean equals(Object other) { + + if (this == other) { + return true; + } + + if (other instanceof SimplexTableau) { + SimplexTableau rhs = (SimplexTableau) other; + return (restrictToNonNegative == rhs.restrictToNonNegative) && + (numDecisionVariables == rhs.numDecisionVariables) && + (numSlackVariables == rhs.numSlackVariables) && + (numArtificialVariables == rhs.numArtificialVariables) && + (epsilon == rhs.epsilon) && + (maxUlps == rhs.maxUlps) && + f.equals(rhs.f) && + constraints.equals(rhs.constraints) && + tableau.equals(rhs.tableau); + } + return false; + } + + /** {@inheritDoc} */ + @Override + public int hashCode() { + return Boolean.valueOf(restrictToNonNegative).hashCode() ^ + numDecisionVariables ^ + numSlackVariables ^ + numArtificialVariables ^ + Double.valueOf(epsilon).hashCode() ^ + maxUlps ^ + f.hashCode() ^ + constraints.hashCode() ^ + tableau.hashCode(); + } + + /** + * Serialize the instance. + * @param oos stream where object should be written + * @throws IOException if object cannot be written to stream + */ + private void writeObject(ObjectOutputStream oos) + throws IOException { + oos.defaultWriteObject(); + MatrixUtils.serializeRealMatrix(tableau, oos); + } + + /** + * Deserialize the instance. + * @param ois stream from which the object should be read + * @throws ClassNotFoundException if a class in the stream cannot be found + * @throws IOException if object cannot be read from the stream + */ + private void readObject(ObjectInputStream ois) + throws ClassNotFoundException, IOException { + ois.defaultReadObject(); + MatrixUtils.deserializeRealMatrix(this, "tableau", ois); + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/linear/SolutionCallback.java b/src/main/java/org/apache/commons/math3/optim/linear/SolutionCallback.java new file mode 100644 index 0000000..24515cc --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/linear/SolutionCallback.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim.linear; + +import org.apache.commons.math3.optim.OptimizationData; +import org.apache.commons.math3.optim.PointValuePair; + +/** + * A callback object that can be provided to a linear optimizer to keep track + * of the best solution found. + * + * @since 3.3 + */ +public class SolutionCallback implements OptimizationData { + /** The SimplexTableau used by the SimplexSolver. */ + private SimplexTableau tableau; + + /** + * Set the simplex tableau used during the optimization once a feasible + * solution has been found. + * + * @param tableau the simplex tableau containing a feasible solution + */ + void setTableau(final SimplexTableau tableau) { + this.tableau = tableau; + } + + /** + * Retrieve the best solution found so far. + * <p> + * <b>Note:</b> the returned solution may not be optimal, e.g. in case + * the optimizer did reach the iteration limits. + * + * @return the best solution found so far by the optimizer, or {@code null} if + * no feasible solution could be found + */ + public PointValuePair getSolution() { + return tableau != null ? tableau.getSolution() : null; + } + + /** + * Returns if the found solution is optimal. + * @return {@code true} if the solution is optimal, {@code false} otherwise + */ + public boolean isSolutionOptimal() { + return tableau != null ? tableau.isOptimal() : false; + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/linear/UnboundedSolutionException.java b/src/main/java/org/apache/commons/math3/optim/linear/UnboundedSolutionException.java new file mode 100644 index 0000000..546cdd2 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/linear/UnboundedSolutionException.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim.linear; + +import org.apache.commons.math3.exception.MathIllegalStateException; +import org.apache.commons.math3.exception.util.LocalizedFormats; + +/** + * This class represents exceptions thrown by optimizers when a solution escapes to infinity. + * + * @since 2.0 + */ +public class UnboundedSolutionException extends MathIllegalStateException { + /** Serializable version identifier. */ + private static final long serialVersionUID = 940539497277290619L; + + /** + * Simple constructor using a default message. + */ + public UnboundedSolutionException() { + super(LocalizedFormats.UNBOUNDED_SOLUTION); + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/linear/package-info.java b/src/main/java/org/apache/commons/math3/optim/linear/package-info.java new file mode 100644 index 0000000..b900589 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/linear/package-info.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Optimization algorithms for linear constrained problems. + */ +package org.apache.commons.math3.optim.linear; diff --git a/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/GoalType.java b/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/GoalType.java new file mode 100644 index 0000000..c0457b4 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/GoalType.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim.nonlinear.scalar; + +import org.apache.commons.math3.optim.OptimizationData; + +/** + * Goal type for an optimization problem (minimization or maximization of + * a scalar function. + * + * @since 2.0 + */ +public enum GoalType implements OptimizationData { + /** Maximization. */ + MAXIMIZE, + /** Minimization. */ + MINIMIZE +} diff --git a/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/GradientMultivariateOptimizer.java b/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/GradientMultivariateOptimizer.java new file mode 100644 index 0000000..38a8bf7 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/GradientMultivariateOptimizer.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim.nonlinear.scalar; + +import org.apache.commons.math3.analysis.MultivariateVectorFunction; +import org.apache.commons.math3.optim.ConvergenceChecker; +import org.apache.commons.math3.optim.OptimizationData; +import org.apache.commons.math3.optim.PointValuePair; +import org.apache.commons.math3.exception.TooManyEvaluationsException; + +/** + * Base class for implementing optimizers for multivariate scalar + * differentiable functions. + * It contains boiler-plate code for dealing with gradient evaluation. + * + * @since 3.1 + */ +public abstract class GradientMultivariateOptimizer + extends MultivariateOptimizer { + /** + * Gradient of the objective function. + */ + private MultivariateVectorFunction gradient; + + /** + * @param checker Convergence checker. + */ + protected GradientMultivariateOptimizer(ConvergenceChecker<PointValuePair> checker) { + super(checker); + } + + /** + * Compute the gradient vector. + * + * @param params Point at which the gradient must be evaluated. + * @return the gradient at the specified point. + */ + protected double[] computeObjectiveGradient(final double[] params) { + return gradient.value(params); + } + + /** + * {@inheritDoc} + * + * @param optData Optimization data. In addition to those documented in + * {@link MultivariateOptimizer#parseOptimizationData(OptimizationData[]) + * MultivariateOptimizer}, this method will register the following data: + * <ul> + * <li>{@link ObjectiveFunctionGradient}</li> + * </ul> + * @return {@inheritDoc} + * @throws TooManyEvaluationsException if the maximal number of + * evaluations (of the objective function) is exceeded. + */ + @Override + public PointValuePair optimize(OptimizationData... optData) + throws TooManyEvaluationsException { + // Set up base class and perform computation. + return super.optimize(optData); + } + + /** + * Scans the list of (required and optional) optimization data that + * characterize the problem. + * + * @param optData Optimization data. + * The following data will be looked for: + * <ul> + * <li>{@link ObjectiveFunctionGradient}</li> + * </ul> + */ + @Override + protected void parseOptimizationData(OptimizationData... optData) { + // Allow base class to register its own data. + super.parseOptimizationData(optData); + + // The existing values (as set by the previous call) are reused if + // not provided in the argument list. + for (OptimizationData data : optData) { + if (data instanceof ObjectiveFunctionGradient) { + gradient = ((ObjectiveFunctionGradient) data).getObjectiveFunctionGradient(); + // If more data must be parsed, this statement _must_ be + // changed to "continue". + break; + } + } + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/LeastSquaresConverter.java b/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/LeastSquaresConverter.java new file mode 100644 index 0000000..4be1f12 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/LeastSquaresConverter.java @@ -0,0 +1,186 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.math3.optim.nonlinear.scalar; + +import org.apache.commons.math3.analysis.MultivariateFunction; +import org.apache.commons.math3.analysis.MultivariateVectorFunction; +import org.apache.commons.math3.exception.DimensionMismatchException; +import org.apache.commons.math3.linear.RealMatrix; + +/** + * This class converts + * {@link MultivariateVectorFunction vectorial objective functions} to + * {@link MultivariateFunction scalar objective functions} + * when the goal is to minimize them. + * <br/> + * This class is mostly used when the vectorial objective function represents + * a theoretical result computed from a point set applied to a model and + * the models point must be adjusted to fit the theoretical result to some + * reference observations. The observations may be obtained for example from + * physical measurements whether the model is built from theoretical + * considerations. + * <br/> + * This class computes a possibly weighted squared sum of the residuals, which is + * a scalar value. The residuals are the difference between the theoretical model + * (i.e. the output of the vectorial objective function) and the observations. The + * class implements the {@link MultivariateFunction} interface and can therefore be + * minimized by any optimizer supporting scalar objectives functions.This is one way + * to perform a least square estimation. There are other ways to do this without using + * this converter, as some optimization algorithms directly support vectorial objective + * functions. + * <br/> + * This class support combination of residuals with or without weights and correlations. + * + * @see MultivariateFunction + * @see MultivariateVectorFunction + * @since 2.0 + */ + +public class LeastSquaresConverter implements MultivariateFunction { + /** Underlying vectorial function. */ + private final MultivariateVectorFunction function; + /** Observations to be compared to objective function to compute residuals. */ + private final double[] observations; + /** Optional weights for the residuals. */ + private final double[] weights; + /** Optional scaling matrix (weight and correlations) for the residuals. */ + private final RealMatrix scale; + + /** + * Builds a simple converter for uncorrelated residuals with identical + * weights. + * + * @param function vectorial residuals function to wrap + * @param observations observations to be compared to objective function to compute residuals + */ + public LeastSquaresConverter(final MultivariateVectorFunction function, + final double[] observations) { + this.function = function; + this.observations = observations.clone(); + this.weights = null; + this.scale = null; + } + + /** + * Builds a simple converter for uncorrelated residuals with the + * specified weights. + * <p> + * The scalar objective function value is computed as: + * <pre> + * objective = ∑weight<sub>i</sub>(observation<sub>i</sub>-objective<sub>i</sub>)<sup>2</sup> + * </pre> + * </p> + * <p> + * Weights can be used for example to combine residuals with different standard + * deviations. As an example, consider a residuals array in which even elements + * are angular measurements in degrees with a 0.01° standard deviation and + * odd elements are distance measurements in meters with a 15m standard deviation. + * In this case, the weights array should be initialized with value + * 1.0/(0.01<sup>2</sup>) in the even elements and 1.0/(15.0<sup>2</sup>) in the + * odd elements (i.e. reciprocals of variances). + * </p> + * <p> + * The array computed by the objective function, the observations array and the + * weights array must have consistent sizes or a {@link DimensionMismatchException} + * will be triggered while computing the scalar objective. + * </p> + * + * @param function vectorial residuals function to wrap + * @param observations observations to be compared to objective function to compute residuals + * @param weights weights to apply to the residuals + * @throws DimensionMismatchException if the observations vector and the weights + * vector dimensions do not match (objective function dimension is checked only when + * the {@link #value(double[])} method is called) + */ + public LeastSquaresConverter(final MultivariateVectorFunction function, + final double[] observations, + final double[] weights) { + if (observations.length != weights.length) { + throw new DimensionMismatchException(observations.length, weights.length); + } + this.function = function; + this.observations = observations.clone(); + this.weights = weights.clone(); + this.scale = null; + } + + /** + * Builds a simple converter for correlated residuals with the + * specified weights. + * <p> + * The scalar objective function value is computed as: + * <pre> + * objective = y<sup>T</sup>y with y = scale×(observation-objective) + * </pre> + * </p> + * <p> + * The array computed by the objective function, the observations array and the + * the scaling matrix must have consistent sizes or a {@link DimensionMismatchException} + * will be triggered while computing the scalar objective. + * </p> + * + * @param function vectorial residuals function to wrap + * @param observations observations to be compared to objective function to compute residuals + * @param scale scaling matrix + * @throws DimensionMismatchException if the observations vector and the scale + * matrix dimensions do not match (objective function dimension is checked only when + * the {@link #value(double[])} method is called) + */ + public LeastSquaresConverter(final MultivariateVectorFunction function, + final double[] observations, + final RealMatrix scale) { + if (observations.length != scale.getColumnDimension()) { + throw new DimensionMismatchException(observations.length, scale.getColumnDimension()); + } + this.function = function; + this.observations = observations.clone(); + this.weights = null; + this.scale = scale.copy(); + } + + /** {@inheritDoc} */ + public double value(final double[] point) { + // compute residuals + final double[] residuals = function.value(point); + if (residuals.length != observations.length) { + throw new DimensionMismatchException(residuals.length, observations.length); + } + for (int i = 0; i < residuals.length; ++i) { + residuals[i] -= observations[i]; + } + + // compute sum of squares + double sumSquares = 0; + if (weights != null) { + for (int i = 0; i < residuals.length; ++i) { + final double ri = residuals[i]; + sumSquares += weights[i] * ri * ri; + } + } else if (scale != null) { + for (final double yi : scale.operate(residuals)) { + sumSquares += yi * yi; + } + } else { + for (final double ri : residuals) { + sumSquares += ri * ri; + } + } + + return sumSquares; + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/LineSearch.java b/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/LineSearch.java new file mode 100644 index 0000000..4a630a2 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/LineSearch.java @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim.nonlinear.scalar; + +import org.apache.commons.math3.optim.univariate.UnivariateOptimizer; +import org.apache.commons.math3.optim.univariate.BrentOptimizer; +import org.apache.commons.math3.optim.univariate.BracketFinder; +import org.apache.commons.math3.optim.univariate.UnivariatePointValuePair; +import org.apache.commons.math3.optim.univariate.SimpleUnivariateValueChecker; +import org.apache.commons.math3.optim.univariate.SearchInterval; +import org.apache.commons.math3.optim.univariate.UnivariateObjectiveFunction; +import org.apache.commons.math3.analysis.UnivariateFunction; +import org.apache.commons.math3.optim.MaxEval; + +/** + * Class for finding the minimum of the objective function along a given + * direction. + * + * @since 3.3 + */ +public class LineSearch { + /** + * Value that will pass the precondition check for {@link BrentOptimizer} + * but will not pass the convergence check, so that the custom checker + * will always decide when to stop the line search. + */ + private static final double REL_TOL_UNUSED = 1e-15; + /** + * Value that will pass the precondition check for {@link BrentOptimizer} + * but will not pass the convergence check, so that the custom checker + * will always decide when to stop the line search. + */ + private static final double ABS_TOL_UNUSED = Double.MIN_VALUE; + /** + * Optimizer used for line search. + */ + private final UnivariateOptimizer lineOptimizer; + /** + * Automatic bracketing. + */ + private final BracketFinder bracket = new BracketFinder(); + /** + * Extent of the initial interval used to find an interval that + * brackets the optimum. + */ + private final double initialBracketingRange; + /** + * Optimizer on behalf of which the line search must be performed. + */ + private final MultivariateOptimizer mainOptimizer; + + /** + * The {@code BrentOptimizer} default stopping criterion uses the + * tolerances to check the domain (point) values, not the function + * values. + * The {@code relativeTolerance} and {@code absoluteTolerance} + * arguments are thus passed to a {@link SimpleUnivariateValueChecker + * custom checker} that will use the function values. + * + * @param optimizer Optimizer on behalf of which the line search + * be performed. + * Its {@link MultivariateOptimizer#computeObjectiveValue(double[]) + * computeObjectiveValue} method will be called by the + * {@link #search(double[],double[]) search} method. + * @param relativeTolerance Search will stop when the function relative + * difference between successive iterations is below this value. + * @param absoluteTolerance Search will stop when the function absolute + * difference between successive iterations is below this value. + * @param initialBracketingRange Extent of the initial interval used to + * find an interval that brackets the optimum. + * If the optimized function varies a lot in the vicinity of the optimum, + * it may be necessary to provide a value lower than the distance between + * successive local minima. + */ + public LineSearch(MultivariateOptimizer optimizer, + double relativeTolerance, + double absoluteTolerance, + double initialBracketingRange) { + mainOptimizer = optimizer; + lineOptimizer = new BrentOptimizer(REL_TOL_UNUSED, + ABS_TOL_UNUSED, + new SimpleUnivariateValueChecker(relativeTolerance, + absoluteTolerance)); + this.initialBracketingRange = initialBracketingRange; + } + + /** + * Finds the number {@code alpha} that optimizes + * {@code f(startPoint + alpha * direction)}. + * + * @param startPoint Starting point. + * @param direction Search direction. + * @return the optimum. + * @throws org.apache.commons.math3.exception.TooManyEvaluationsException + * if the number of evaluations is exceeded. + */ + public UnivariatePointValuePair search(final double[] startPoint, + final double[] direction) { + final int n = startPoint.length; + final UnivariateFunction f = new UnivariateFunction() { + /** {@inheritDoc} */ + public double value(double alpha) { + final double[] x = new double[n]; + for (int i = 0; i < n; i++) { + x[i] = startPoint[i] + alpha * direction[i]; + } + final double obj = mainOptimizer.computeObjectiveValue(x); + return obj; + } + }; + + final GoalType goal = mainOptimizer.getGoalType(); + bracket.search(f, goal, 0, initialBracketingRange); + // Passing "MAX_VALUE" as a dummy value because it is the enclosing + // class that counts the number of evaluations (and will eventually + // generate the exception). + return lineOptimizer.optimize(new MaxEval(Integer.MAX_VALUE), + new UnivariateObjectiveFunction(f), + goal, + new SearchInterval(bracket.getLo(), + bracket.getHi(), + bracket.getMid())); + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/MultiStartMultivariateOptimizer.java b/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/MultiStartMultivariateOptimizer.java new file mode 100644 index 0000000..86dcd70 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/MultiStartMultivariateOptimizer.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim.nonlinear.scalar; + +import java.util.Collections; +import java.util.List; +import java.util.ArrayList; +import java.util.Comparator; +import org.apache.commons.math3.exception.NotStrictlyPositiveException; +import org.apache.commons.math3.exception.NullArgumentException; +import org.apache.commons.math3.random.RandomVectorGenerator; +import org.apache.commons.math3.optim.BaseMultiStartMultivariateOptimizer; +import org.apache.commons.math3.optim.PointValuePair; + +/** + * Multi-start optimizer. + * + * This class wraps an optimizer in order to use it several times in + * turn with different starting points (trying to avoid being trapped + * in a local extremum when looking for a global one). + * + * @since 3.0 + */ +public class MultiStartMultivariateOptimizer + extends BaseMultiStartMultivariateOptimizer<PointValuePair> { + /** Underlying optimizer. */ + private final MultivariateOptimizer optimizer; + /** Found optima. */ + private final List<PointValuePair> optima = new ArrayList<PointValuePair>(); + + /** + * Create a multi-start optimizer from a single-start optimizer. + * + * @param optimizer Single-start optimizer to wrap. + * @param starts Number of starts to perform. + * If {@code starts == 1}, the result will be same as if {@code optimizer} + * is called directly. + * @param generator Random vector generator to use for restarts. + * @throws NullArgumentException if {@code optimizer} or {@code generator} + * is {@code null}. + * @throws NotStrictlyPositiveException if {@code starts < 1}. + */ + public MultiStartMultivariateOptimizer(final MultivariateOptimizer optimizer, + final int starts, + final RandomVectorGenerator generator) + throws NullArgumentException, + NotStrictlyPositiveException { + super(optimizer, starts, generator); + this.optimizer = optimizer; + } + + /** + * {@inheritDoc} + */ + @Override + public PointValuePair[] getOptima() { + Collections.sort(optima, getPairComparator()); + return optima.toArray(new PointValuePair[0]); + } + + /** + * {@inheritDoc} + */ + @Override + protected void store(PointValuePair optimum) { + optima.add(optimum); + } + + /** + * {@inheritDoc} + */ + @Override + protected void clear() { + optima.clear(); + } + + /** + * @return a comparator for sorting the optima. + */ + private Comparator<PointValuePair> getPairComparator() { + return new Comparator<PointValuePair>() { + /** {@inheritDoc} */ + public int compare(final PointValuePair o1, + final PointValuePair o2) { + if (o1 == null) { + return (o2 == null) ? 0 : 1; + } else if (o2 == null) { + return -1; + } + final double v1 = o1.getValue(); + final double v2 = o2.getValue(); + return (optimizer.getGoalType() == GoalType.MINIMIZE) ? + Double.compare(v1, v2) : Double.compare(v2, v1); + } + }; + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/MultivariateFunctionMappingAdapter.java b/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/MultivariateFunctionMappingAdapter.java new file mode 100644 index 0000000..3c5127c --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/MultivariateFunctionMappingAdapter.java @@ -0,0 +1,294 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim.nonlinear.scalar; + +import org.apache.commons.math3.analysis.MultivariateFunction; +import org.apache.commons.math3.analysis.UnivariateFunction; +import org.apache.commons.math3.analysis.function.Logit; +import org.apache.commons.math3.analysis.function.Sigmoid; +import org.apache.commons.math3.exception.DimensionMismatchException; +import org.apache.commons.math3.exception.NumberIsTooSmallException; +import org.apache.commons.math3.util.FastMath; +import org.apache.commons.math3.util.MathUtils; + +/** + * <p>Adapter for mapping bounded {@link MultivariateFunction} to unbounded ones.</p> + * + * <p> + * This adapter can be used to wrap functions subject to simple bounds on + * parameters so they can be used by optimizers that do <em>not</em> directly + * support simple bounds. + * </p> + * <p> + * The principle is that the user function that will be wrapped will see its + * parameters bounded as required, i.e when its {@code value} method is called + * with argument array {@code point}, the elements array will fulfill requirement + * {@code lower[i] <= point[i] <= upper[i]} for all i. Some of the components + * may be unbounded or bounded only on one side if the corresponding bound is + * set to an infinite value. The optimizer will not manage the user function by + * itself, but it will handle this adapter and it is this adapter that will take + * care the bounds are fulfilled. The adapter {@link #value(double[])} method will + * be called by the optimizer with unbound parameters, and the adapter will map + * the unbounded value to the bounded range using appropriate functions like + * {@link Sigmoid} for double bounded elements for example. + * </p> + * <p> + * As the optimizer sees only unbounded parameters, it should be noted that the + * start point or simplex expected by the optimizer should be unbounded, so the + * user is responsible for converting his bounded point to unbounded by calling + * {@link #boundedToUnbounded(double[])} before providing them to the optimizer. + * For the same reason, the point returned by the {@link + * org.apache.commons.math3.optimization.BaseMultivariateOptimizer#optimize(int, + * MultivariateFunction, org.apache.commons.math3.optimization.GoalType, double[])} + * method is unbounded. So to convert this point to bounded, users must call + * {@link #unboundedToBounded(double[])} by themselves!</p> + * <p> + * This adapter is only a poor man solution to simple bounds optimization constraints + * that can be used with simple optimizers like + * {@link org.apache.commons.math3.optim.nonlinear.scalar.noderiv.SimplexOptimizer + * SimplexOptimizer}. + * A better solution is to use an optimizer that directly supports simple bounds like + * {@link org.apache.commons.math3.optim.nonlinear.scalar.noderiv.CMAESOptimizer + * CMAESOptimizer} or + * {@link org.apache.commons.math3.optim.nonlinear.scalar.noderiv.BOBYQAOptimizer + * BOBYQAOptimizer}. + * One caveat of this poor-man's solution is that behavior near the bounds may be + * numerically unstable as bounds are mapped from infinite values. + * Another caveat is that convergence values are evaluated by the optimizer with + * respect to unbounded variables, so there will be scales differences when + * converted to bounded variables. + * </p> + * + * @see MultivariateFunctionPenaltyAdapter + * + * @since 3.0 + */ +public class MultivariateFunctionMappingAdapter + implements MultivariateFunction { + /** Underlying bounded function. */ + private final MultivariateFunction bounded; + /** Mapping functions. */ + private final Mapper[] mappers; + + /** Simple constructor. + * @param bounded bounded function + * @param lower lower bounds for each element of the input parameters array + * (some elements may be set to {@code Double.NEGATIVE_INFINITY} for + * unbounded values) + * @param upper upper bounds for each element of the input parameters array + * (some elements may be set to {@code Double.POSITIVE_INFINITY} for + * unbounded values) + * @exception DimensionMismatchException if lower and upper bounds are not + * consistent, either according to dimension or to values + */ + public MultivariateFunctionMappingAdapter(final MultivariateFunction bounded, + final double[] lower, final double[] upper) { + // safety checks + MathUtils.checkNotNull(lower); + MathUtils.checkNotNull(upper); + if (lower.length != upper.length) { + throw new DimensionMismatchException(lower.length, upper.length); + } + for (int i = 0; i < lower.length; ++i) { + // note the following test is written in such a way it also fails for NaN + if (!(upper[i] >= lower[i])) { + throw new NumberIsTooSmallException(upper[i], lower[i], true); + } + } + + this.bounded = bounded; + this.mappers = new Mapper[lower.length]; + for (int i = 0; i < mappers.length; ++i) { + if (Double.isInfinite(lower[i])) { + if (Double.isInfinite(upper[i])) { + // element is unbounded, no transformation is needed + mappers[i] = new NoBoundsMapper(); + } else { + // element is simple-bounded on the upper side + mappers[i] = new UpperBoundMapper(upper[i]); + } + } else { + if (Double.isInfinite(upper[i])) { + // element is simple-bounded on the lower side + mappers[i] = new LowerBoundMapper(lower[i]); + } else { + // element is double-bounded + mappers[i] = new LowerUpperBoundMapper(lower[i], upper[i]); + } + } + } + } + + /** + * Maps an array from unbounded to bounded. + * + * @param point Unbounded values. + * @return the bounded values. + */ + public double[] unboundedToBounded(double[] point) { + // Map unbounded input point to bounded point. + final double[] mapped = new double[mappers.length]; + for (int i = 0; i < mappers.length; ++i) { + mapped[i] = mappers[i].unboundedToBounded(point[i]); + } + + return mapped; + } + + /** + * Maps an array from bounded to unbounded. + * + * @param point Bounded values. + * @return the unbounded values. + */ + public double[] boundedToUnbounded(double[] point) { + // Map bounded input point to unbounded point. + final double[] mapped = new double[mappers.length]; + for (int i = 0; i < mappers.length; ++i) { + mapped[i] = mappers[i].boundedToUnbounded(point[i]); + } + + return mapped; + } + + /** + * Compute the underlying function value from an unbounded point. + * <p> + * This method simply bounds the unbounded point using the mappings + * set up at construction and calls the underlying function using + * the bounded point. + * </p> + * @param point unbounded value + * @return underlying function value + * @see #unboundedToBounded(double[]) + */ + public double value(double[] point) { + return bounded.value(unboundedToBounded(point)); + } + + /** Mapping interface. */ + private interface Mapper { + /** + * Maps a value from unbounded to bounded. + * + * @param y Unbounded value. + * @return the bounded value. + */ + double unboundedToBounded(double y); + + /** + * Maps a value from bounded to unbounded. + * + * @param x Bounded value. + * @return the unbounded value. + */ + double boundedToUnbounded(double x); + } + + /** Local class for no bounds mapping. */ + private static class NoBoundsMapper implements Mapper { + /** {@inheritDoc} */ + public double unboundedToBounded(final double y) { + return y; + } + + /** {@inheritDoc} */ + public double boundedToUnbounded(final double x) { + return x; + } + } + + /** Local class for lower bounds mapping. */ + private static class LowerBoundMapper implements Mapper { + /** Low bound. */ + private final double lower; + + /** + * Simple constructor. + * + * @param lower lower bound + */ + LowerBoundMapper(final double lower) { + this.lower = lower; + } + + /** {@inheritDoc} */ + public double unboundedToBounded(final double y) { + return lower + FastMath.exp(y); + } + + /** {@inheritDoc} */ + public double boundedToUnbounded(final double x) { + return FastMath.log(x - lower); + } + + } + + /** Local class for upper bounds mapping. */ + private static class UpperBoundMapper implements Mapper { + + /** Upper bound. */ + private final double upper; + + /** Simple constructor. + * @param upper upper bound + */ + UpperBoundMapper(final double upper) { + this.upper = upper; + } + + /** {@inheritDoc} */ + public double unboundedToBounded(final double y) { + return upper - FastMath.exp(-y); + } + + /** {@inheritDoc} */ + public double boundedToUnbounded(final double x) { + return -FastMath.log(upper - x); + } + + } + + /** Local class for lower and bounds mapping. */ + private static class LowerUpperBoundMapper implements Mapper { + /** Function from unbounded to bounded. */ + private final UnivariateFunction boundingFunction; + /** Function from bounded to unbounded. */ + private final UnivariateFunction unboundingFunction; + + /** + * Simple constructor. + * + * @param lower lower bound + * @param upper upper bound + */ + LowerUpperBoundMapper(final double lower, final double upper) { + boundingFunction = new Sigmoid(lower, upper); + unboundingFunction = new Logit(lower, upper); + } + + /** {@inheritDoc} */ + public double unboundedToBounded(final double y) { + return boundingFunction.value(y); + } + + /** {@inheritDoc} */ + public double boundedToUnbounded(final double x) { + return unboundingFunction.value(x); + } + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/MultivariateFunctionPenaltyAdapter.java b/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/MultivariateFunctionPenaltyAdapter.java new file mode 100644 index 0000000..931f17f --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/MultivariateFunctionPenaltyAdapter.java @@ -0,0 +1,186 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim.nonlinear.scalar; + +import org.apache.commons.math3.analysis.MultivariateFunction; +import org.apache.commons.math3.exception.DimensionMismatchException; +import org.apache.commons.math3.exception.NumberIsTooSmallException; +import org.apache.commons.math3.util.FastMath; +import org.apache.commons.math3.util.MathUtils; + +/** + * <p>Adapter extending bounded {@link MultivariateFunction} to an unbouded + * domain using a penalty function.</p> + * + * <p> + * This adapter can be used to wrap functions subject to simple bounds on + * parameters so they can be used by optimizers that do <em>not</em> directly + * support simple bounds. + * </p> + * <p> + * The principle is that the user function that will be wrapped will see its + * parameters bounded as required, i.e when its {@code value} method is called + * with argument array {@code point}, the elements array will fulfill requirement + * {@code lower[i] <= point[i] <= upper[i]} for all i. Some of the components + * may be unbounded or bounded only on one side if the corresponding bound is + * set to an infinite value. The optimizer will not manage the user function by + * itself, but it will handle this adapter and it is this adapter that will take + * care the bounds are fulfilled. The adapter {@link #value(double[])} method will + * be called by the optimizer with unbound parameters, and the adapter will check + * if the parameters is within range or not. If it is in range, then the underlying + * user function will be called, and if it is not the value of a penalty function + * will be returned instead. + * </p> + * <p> + * This adapter is only a poor-man's solution to simple bounds optimization + * constraints that can be used with simple optimizers like + * {@link org.apache.commons.math3.optim.nonlinear.scalar.noderiv.SimplexOptimizer + * SimplexOptimizer}. + * A better solution is to use an optimizer that directly supports simple bounds like + * {@link org.apache.commons.math3.optim.nonlinear.scalar.noderiv.CMAESOptimizer + * CMAESOptimizer} or + * {@link org.apache.commons.math3.optim.nonlinear.scalar.noderiv.BOBYQAOptimizer + * BOBYQAOptimizer}. + * One caveat of this poor-man's solution is that if start point or start simplex + * is completely outside of the allowed range, only the penalty function is used, + * and the optimizer may converge without ever entering the range. + * </p> + * + * @see MultivariateFunctionMappingAdapter + * + * @since 3.0 + */ +public class MultivariateFunctionPenaltyAdapter + implements MultivariateFunction { + /** Underlying bounded function. */ + private final MultivariateFunction bounded; + /** Lower bounds. */ + private final double[] lower; + /** Upper bounds. */ + private final double[] upper; + /** Penalty offset. */ + private final double offset; + /** Penalty scales. */ + private final double[] scale; + + /** + * Simple constructor. + * <p> + * When the optimizer provided points are out of range, the value of the + * penalty function will be used instead of the value of the underlying + * function. In order for this penalty to be effective in rejecting this + * point during the optimization process, the penalty function value should + * be defined with care. This value is computed as: + * <pre> + * penalty(point) = offset + ∑<sub>i</sub>[scale[i] * √|point[i]-boundary[i]|] + * </pre> + * where indices i correspond to all the components that violates their boundaries. + * </p> + * <p> + * So when attempting a function minimization, offset should be larger than + * the maximum expected value of the underlying function and scale components + * should all be positive. When attempting a function maximization, offset + * should be lesser than the minimum expected value of the underlying function + * and scale components should all be negative. + * minimization, and lesser than the minimum expected value of the underlying + * function when attempting maximization. + * </p> + * <p> + * These choices for the penalty function have two properties. First, all out + * of range points will return a function value that is worse than the value + * returned by any in range point. Second, the penalty is worse for large + * boundaries violation than for small violations, so the optimizer has an hint + * about the direction in which it should search for acceptable points. + * </p> + * @param bounded bounded function + * @param lower lower bounds for each element of the input parameters array + * (some elements may be set to {@code Double.NEGATIVE_INFINITY} for + * unbounded values) + * @param upper upper bounds for each element of the input parameters array + * (some elements may be set to {@code Double.POSITIVE_INFINITY} for + * unbounded values) + * @param offset base offset of the penalty function + * @param scale scale of the penalty function + * @exception DimensionMismatchException if lower bounds, upper bounds and + * scales are not consistent, either according to dimension or to bounadary + * values + */ + public MultivariateFunctionPenaltyAdapter(final MultivariateFunction bounded, + final double[] lower, final double[] upper, + final double offset, final double[] scale) { + + // safety checks + MathUtils.checkNotNull(lower); + MathUtils.checkNotNull(upper); + MathUtils.checkNotNull(scale); + if (lower.length != upper.length) { + throw new DimensionMismatchException(lower.length, upper.length); + } + if (lower.length != scale.length) { + throw new DimensionMismatchException(lower.length, scale.length); + } + for (int i = 0; i < lower.length; ++i) { + // note the following test is written in such a way it also fails for NaN + if (!(upper[i] >= lower[i])) { + throw new NumberIsTooSmallException(upper[i], lower[i], true); + } + } + + this.bounded = bounded; + this.lower = lower.clone(); + this.upper = upper.clone(); + this.offset = offset; + this.scale = scale.clone(); + } + + /** + * Computes the underlying function value from an unbounded point. + * <p> + * This method simply returns the value of the underlying function + * if the unbounded point already fulfills the bounds, and compute + * a replacement value using the offset and scale if bounds are + * violated, without calling the function at all. + * </p> + * @param point unbounded point + * @return either underlying function value or penalty function value + */ + public double value(double[] point) { + + for (int i = 0; i < scale.length; ++i) { + if ((point[i] < lower[i]) || (point[i] > upper[i])) { + // bound violation starting at this component + double sum = 0; + for (int j = i; j < scale.length; ++j) { + final double overshoot; + if (point[j] < lower[j]) { + overshoot = scale[j] * (lower[j] - point[j]); + } else if (point[j] > upper[j]) { + overshoot = scale[j] * (point[j] - upper[j]); + } else { + overshoot = 0; + } + sum += FastMath.sqrt(overshoot); + } + return offset + sum; + } + } + + // all boundaries are fulfilled, we are in the expected + // domain of the underlying function + return bounded.value(point); + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/MultivariateOptimizer.java b/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/MultivariateOptimizer.java new file mode 100644 index 0000000..bc0bec9 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/MultivariateOptimizer.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim.nonlinear.scalar; + +import org.apache.commons.math3.analysis.MultivariateFunction; +import org.apache.commons.math3.optim.BaseMultivariateOptimizer; +import org.apache.commons.math3.optim.OptimizationData; +import org.apache.commons.math3.optim.ConvergenceChecker; +import org.apache.commons.math3.optim.PointValuePair; +import org.apache.commons.math3.exception.TooManyEvaluationsException; + +/** + * Base class for a multivariate scalar function optimizer. + * + * @since 3.1 + */ +public abstract class MultivariateOptimizer + extends BaseMultivariateOptimizer<PointValuePair> { + /** Objective function. */ + private MultivariateFunction function; + /** Type of optimization. */ + private GoalType goal; + + /** + * @param checker Convergence checker. + */ + protected MultivariateOptimizer(ConvergenceChecker<PointValuePair> checker) { + super(checker); + } + + /** + * {@inheritDoc} + * + * @param optData Optimization data. In addition to those documented in + * {@link BaseMultivariateOptimizer#parseOptimizationData(OptimizationData[]) + * BaseMultivariateOptimizer}, this method will register the following data: + * <ul> + * <li>{@link ObjectiveFunction}</li> + * <li>{@link GoalType}</li> + * </ul> + * @return {@inheritDoc} + * @throws TooManyEvaluationsException if the maximal number of + * evaluations is exceeded. + */ + @Override + public PointValuePair optimize(OptimizationData... optData) + throws TooManyEvaluationsException { + // Set up base class and perform computation. + return super.optimize(optData); + } + + /** + * Scans the list of (required and optional) optimization data that + * characterize the problem. + * + * @param optData Optimization data. + * The following data will be looked for: + * <ul> + * <li>{@link ObjectiveFunction}</li> + * <li>{@link GoalType}</li> + * </ul> + */ + @Override + protected void parseOptimizationData(OptimizationData... optData) { + // Allow base class to register its own data. + super.parseOptimizationData(optData); + + // The existing values (as set by the previous call) are reused if + // not provided in the argument list. + for (OptimizationData data : optData) { + if (data instanceof GoalType) { + goal = (GoalType) data; + continue; + } + if (data instanceof ObjectiveFunction) { + function = ((ObjectiveFunction) data).getObjectiveFunction(); + continue; + } + } + } + + /** + * @return the optimization type. + */ + public GoalType getGoalType() { + return goal; + } + + /** + * Computes the objective function value. + * This method <em>must</em> be called by subclasses to enforce the + * evaluation counter limit. + * + * @param params Point at which the objective function must be evaluated. + * @return the objective function value at the specified point. + * @throws TooManyEvaluationsException if the maximal number of + * evaluations is exceeded. + */ + public double computeObjectiveValue(double[] params) { + super.incrementEvaluationCount(); + return function.value(params); + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/ObjectiveFunction.java b/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/ObjectiveFunction.java new file mode 100644 index 0000000..643cc03 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/ObjectiveFunction.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim.nonlinear.scalar; + +import org.apache.commons.math3.analysis.MultivariateFunction; +import org.apache.commons.math3.optim.OptimizationData; + +/** + * Scalar function to be optimized. + * + * @since 3.1 + */ +public class ObjectiveFunction implements OptimizationData { + /** Function to be optimized. */ + private final MultivariateFunction function; + + /** + * @param f Function to be optimized. + */ + public ObjectiveFunction(MultivariateFunction f) { + function = f; + } + + /** + * Gets the function to be optimized. + * + * @return the objective function. + */ + public MultivariateFunction getObjectiveFunction() { + return function; + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/ObjectiveFunctionGradient.java b/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/ObjectiveFunctionGradient.java new file mode 100644 index 0000000..2fcf2ee --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/ObjectiveFunctionGradient.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim.nonlinear.scalar; + +import org.apache.commons.math3.analysis.MultivariateVectorFunction; +import org.apache.commons.math3.optim.OptimizationData; + +/** + * Gradient of the scalar function to be optimized. + * + * @since 3.1 + */ +public class ObjectiveFunctionGradient implements OptimizationData { + /** Function to be optimized. */ + private final MultivariateVectorFunction gradient; + + /** + * @param g Gradient of the function to be optimized. + */ + public ObjectiveFunctionGradient(MultivariateVectorFunction g) { + gradient = g; + } + + /** + * Gets the gradient of the function to be optimized. + * + * @return the objective function gradient. + */ + public MultivariateVectorFunction getObjectiveFunctionGradient() { + return gradient; + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/gradient/NonLinearConjugateGradientOptimizer.java b/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/gradient/NonLinearConjugateGradientOptimizer.java new file mode 100644 index 0000000..9074122 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/gradient/NonLinearConjugateGradientOptimizer.java @@ -0,0 +1,415 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.math3.optim.nonlinear.scalar.gradient; + +import org.apache.commons.math3.analysis.solvers.UnivariateSolver; +import org.apache.commons.math3.exception.MathInternalError; +import org.apache.commons.math3.exception.TooManyEvaluationsException; +import org.apache.commons.math3.exception.MathUnsupportedOperationException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.optim.OptimizationData; +import org.apache.commons.math3.optim.PointValuePair; +import org.apache.commons.math3.optim.ConvergenceChecker; +import org.apache.commons.math3.optim.nonlinear.scalar.GoalType; +import org.apache.commons.math3.optim.nonlinear.scalar.GradientMultivariateOptimizer; +import org.apache.commons.math3.optim.nonlinear.scalar.LineSearch; + + +/** + * Non-linear conjugate gradient optimizer. + * <br/> + * This class supports both the Fletcher-Reeves and the Polak-Ribière + * update formulas for the conjugate search directions. + * It also supports optional preconditioning. + * <br/> + * Constraints are not supported: the call to + * {@link #optimize(OptimizationData[]) optimize} will throw + * {@link MathUnsupportedOperationException} if bounds are passed to it. + * + * @since 2.0 + */ +public class NonLinearConjugateGradientOptimizer + extends GradientMultivariateOptimizer { + /** Update formula for the beta parameter. */ + private final Formula updateFormula; + /** Preconditioner (may be null). */ + private final Preconditioner preconditioner; + /** Line search algorithm. */ + private final LineSearch line; + + /** + * Available choices of update formulas for the updating the parameter + * that is used to compute the successive conjugate search directions. + * For non-linear conjugate gradients, there are + * two formulas: + * <ul> + * <li>Fletcher-Reeves formula</li> + * <li>Polak-Ribière formula</li> + * </ul> + * + * On the one hand, the Fletcher-Reeves formula is guaranteed to converge + * if the start point is close enough of the optimum whether the + * Polak-Ribière formula may not converge in rare cases. On the + * other hand, the Polak-Ribière formula is often faster when it + * does converge. Polak-Ribière is often used. + * + * @since 2.0 + */ + public enum Formula { + /** Fletcher-Reeves formula. */ + FLETCHER_REEVES, + /** Polak-Ribière formula. */ + POLAK_RIBIERE + } + + /** + * The initial step is a factor with respect to the search direction + * (which itself is roughly related to the gradient of the function). + * <br/> + * It is used to find an interval that brackets the optimum in line + * search. + * + * @since 3.1 + * @deprecated As of v3.3, this class is not used anymore. + * This setting is replaced by the {@code initialBracketingRange} + * argument to the new constructors. + */ + @Deprecated + public static class BracketingStep implements OptimizationData { + /** Initial step. */ + private final double initialStep; + + /** + * @param step Initial step for the bracket search. + */ + public BracketingStep(double step) { + initialStep = step; + } + + /** + * Gets the initial step. + * + * @return the initial step. + */ + public double getBracketingStep() { + return initialStep; + } + } + + /** + * Constructor with default tolerances for the line search (1e-8) and + * {@link IdentityPreconditioner preconditioner}. + * + * @param updateFormula formula to use for updating the β parameter, + * must be one of {@link Formula#FLETCHER_REEVES} or + * {@link Formula#POLAK_RIBIERE}. + * @param checker Convergence checker. + */ + public NonLinearConjugateGradientOptimizer(final Formula updateFormula, + ConvergenceChecker<PointValuePair> checker) { + this(updateFormula, + checker, + 1e-8, + 1e-8, + 1e-8, + new IdentityPreconditioner()); + } + + /** + * Constructor with default {@link IdentityPreconditioner preconditioner}. + * + * @param updateFormula formula to use for updating the β parameter, + * must be one of {@link Formula#FLETCHER_REEVES} or + * {@link Formula#POLAK_RIBIERE}. + * @param checker Convergence checker. + * @param lineSearchSolver Solver to use during line search. + * @deprecated as of 3.3. Please use + * {@link #NonLinearConjugateGradientOptimizer(Formula,ConvergenceChecker,double,double,double)} instead. + */ + @Deprecated + public NonLinearConjugateGradientOptimizer(final Formula updateFormula, + ConvergenceChecker<PointValuePair> checker, + final UnivariateSolver lineSearchSolver) { + this(updateFormula, + checker, + lineSearchSolver, + new IdentityPreconditioner()); + } + + /** + * Constructor with default {@link IdentityPreconditioner preconditioner}. + * + * @param updateFormula formula to use for updating the β parameter, + * must be one of {@link Formula#FLETCHER_REEVES} or + * {@link Formula#POLAK_RIBIERE}. + * @param checker Convergence checker. + * @param relativeTolerance Relative threshold for line search. + * @param absoluteTolerance Absolute threshold for line search. + * @param initialBracketingRange Extent of the initial interval used to + * find an interval that brackets the optimum in order to perform the + * line search. + * + * @see LineSearch#LineSearch(MultivariateOptimizer,double,double,double) + * @since 3.3 + */ + public NonLinearConjugateGradientOptimizer(final Formula updateFormula, + ConvergenceChecker<PointValuePair> checker, + double relativeTolerance, + double absoluteTolerance, + double initialBracketingRange) { + this(updateFormula, + checker, + relativeTolerance, + absoluteTolerance, + initialBracketingRange, + new IdentityPreconditioner()); + } + + /** + * @param updateFormula formula to use for updating the β parameter, + * must be one of {@link Formula#FLETCHER_REEVES} or + * {@link Formula#POLAK_RIBIERE}. + * @param checker Convergence checker. + * @param lineSearchSolver Solver to use during line search. + * @param preconditioner Preconditioner. + * @deprecated as of 3.3. Please use + * {@link #NonLinearConjugateGradientOptimizer(Formula,ConvergenceChecker,double,double,double,Preconditioner)} instead. + */ + @Deprecated + public NonLinearConjugateGradientOptimizer(final Formula updateFormula, + ConvergenceChecker<PointValuePair> checker, + final UnivariateSolver lineSearchSolver, + final Preconditioner preconditioner) { + this(updateFormula, + checker, + lineSearchSolver.getRelativeAccuracy(), + lineSearchSolver.getAbsoluteAccuracy(), + lineSearchSolver.getAbsoluteAccuracy(), + preconditioner); + } + + /** + * @param updateFormula formula to use for updating the β parameter, + * must be one of {@link Formula#FLETCHER_REEVES} or + * {@link Formula#POLAK_RIBIERE}. + * @param checker Convergence checker. + * @param preconditioner Preconditioner. + * @param relativeTolerance Relative threshold for line search. + * @param absoluteTolerance Absolute threshold for line search. + * @param initialBracketingRange Extent of the initial interval used to + * find an interval that brackets the optimum in order to perform the + * line search. + * + * @see LineSearch#LineSearch(MultivariateOptimizer,double,double,double) + * @since 3.3 + */ + public NonLinearConjugateGradientOptimizer(final Formula updateFormula, + ConvergenceChecker<PointValuePair> checker, + double relativeTolerance, + double absoluteTolerance, + double initialBracketingRange, + final Preconditioner preconditioner) { + super(checker); + + this.updateFormula = updateFormula; + this.preconditioner = preconditioner; + line = new LineSearch(this, + relativeTolerance, + absoluteTolerance, + initialBracketingRange); + } + + /** + * {@inheritDoc} + */ + @Override + public PointValuePair optimize(OptimizationData... optData) + throws TooManyEvaluationsException { + // Set up base class and perform computation. + return super.optimize(optData); + } + + /** {@inheritDoc} */ + @Override + protected PointValuePair doOptimize() { + final ConvergenceChecker<PointValuePair> checker = getConvergenceChecker(); + final double[] point = getStartPoint(); + final GoalType goal = getGoalType(); + final int n = point.length; + double[] r = computeObjectiveGradient(point); + if (goal == GoalType.MINIMIZE) { + for (int i = 0; i < n; i++) { + r[i] = -r[i]; + } + } + + // Initial search direction. + double[] steepestDescent = preconditioner.precondition(point, r); + double[] searchDirection = steepestDescent.clone(); + + double delta = 0; + for (int i = 0; i < n; ++i) { + delta += r[i] * searchDirection[i]; + } + + PointValuePair current = null; + while (true) { + incrementIterationCount(); + + final double objective = computeObjectiveValue(point); + PointValuePair previous = current; + current = new PointValuePair(point, objective); + if (previous != null && checker.converged(getIterations(), previous, current)) { + // We have found an optimum. + return current; + } + + final double step = line.search(point, searchDirection).getPoint(); + + // Validate new point. + for (int i = 0; i < point.length; ++i) { + point[i] += step * searchDirection[i]; + } + + r = computeObjectiveGradient(point); + if (goal == GoalType.MINIMIZE) { + for (int i = 0; i < n; ++i) { + r[i] = -r[i]; + } + } + + // Compute beta. + final double deltaOld = delta; + final double[] newSteepestDescent = preconditioner.precondition(point, r); + delta = 0; + for (int i = 0; i < n; ++i) { + delta += r[i] * newSteepestDescent[i]; + } + + final double beta; + switch (updateFormula) { + case FLETCHER_REEVES: + beta = delta / deltaOld; + break; + case POLAK_RIBIERE: + double deltaMid = 0; + for (int i = 0; i < r.length; ++i) { + deltaMid += r[i] * steepestDescent[i]; + } + beta = (delta - deltaMid) / deltaOld; + break; + default: + // Should never happen. + throw new MathInternalError(); + } + steepestDescent = newSteepestDescent; + + // Compute conjugate search direction. + if (getIterations() % n == 0 || + beta < 0) { + // Break conjugation: reset search direction. + searchDirection = steepestDescent.clone(); + } else { + // Compute new conjugate search direction. + for (int i = 0; i < n; ++i) { + searchDirection[i] = steepestDescent[i] + beta * searchDirection[i]; + } + } + } + } + + /** + * {@inheritDoc} + */ + @Override + protected void parseOptimizationData(OptimizationData... optData) { + // Allow base class to register its own data. + super.parseOptimizationData(optData); + + checkParameters(); + } + + /** Default identity preconditioner. */ + public static class IdentityPreconditioner implements Preconditioner { + /** {@inheritDoc} */ + public double[] precondition(double[] variables, double[] r) { + return r.clone(); + } + } + + // Class is not used anymore (cf. MATH-1092). However, it might + // be interesting to create a class similar to "LineSearch", but + // that will take advantage that the model's gradient is available. +// /** +// * Internal class for line search. +// * <p> +// * The function represented by this class is the dot product of +// * the objective function gradient and the search direction. Its +// * value is zero when the gradient is orthogonal to the search +// * direction, i.e. when the objective function value is a local +// * extremum along the search direction. +// * </p> +// */ +// private class LineSearchFunction implements UnivariateFunction { +// /** Current point. */ +// private final double[] currentPoint; +// /** Search direction. */ +// private final double[] searchDirection; + +// /** +// * @param point Current point. +// * @param direction Search direction. +// */ +// public LineSearchFunction(double[] point, +// double[] direction) { +// currentPoint = point.clone(); +// searchDirection = direction.clone(); +// } + +// /** {@inheritDoc} */ +// public double value(double x) { +// // current point in the search direction +// final double[] shiftedPoint = currentPoint.clone(); +// for (int i = 0; i < shiftedPoint.length; ++i) { +// shiftedPoint[i] += x * searchDirection[i]; +// } + +// // gradient of the objective function +// final double[] gradient = computeObjectiveGradient(shiftedPoint); + +// // dot product with the search direction +// double dotProduct = 0; +// for (int i = 0; i < gradient.length; ++i) { +// dotProduct += gradient[i] * searchDirection[i]; +// } + +// return dotProduct; +// } +// } + + /** + * @throws MathUnsupportedOperationException if bounds were passed to the + * {@link #optimize(OptimizationData[]) optimize} method. + */ + private void checkParameters() { + if (getLowerBound() != null || + getUpperBound() != null) { + throw new MathUnsupportedOperationException(LocalizedFormats.CONSTRAINT); + } + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/gradient/Preconditioner.java b/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/gradient/Preconditioner.java new file mode 100644 index 0000000..3c0f8fb --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/gradient/Preconditioner.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.math3.optim.nonlinear.scalar.gradient; + +/** + * This interface represents a preconditioner for differentiable scalar + * objective function optimizers. + * @since 2.0 + */ +public interface Preconditioner { + /** + * Precondition a search direction. + * <p> + * The returned preconditioned search direction must be computed fast or + * the algorithm performances will drop drastically. A classical approach + * is to compute only the diagonal elements of the hessian and to divide + * the raw search direction by these elements if they are all positive. + * If at least one of them is negative, it is safer to return a clone of + * the raw search direction as if the hessian was the identity matrix. The + * rationale for this simplified choice is that a negative diagonal element + * means the current point is far from the optimum and preconditioning will + * not be efficient anyway in this case. + * </p> + * @param point current point at which the search direction was computed + * @param r raw search direction (i.e. opposite of the gradient) + * @return approximation of H<sup>-1</sup>r where H is the objective function hessian + */ + double[] precondition(double[] point, double[] r); +} diff --git a/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/gradient/package-info.java b/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/gradient/package-info.java new file mode 100644 index 0000000..9dd9c5a --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/gradient/package-info.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * This package provides optimization algorithms that require derivatives. + */ +package org.apache.commons.math3.optim.nonlinear.scalar.gradient; diff --git a/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/noderiv/AbstractSimplex.java b/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/noderiv/AbstractSimplex.java new file mode 100644 index 0000000..e959787 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/noderiv/AbstractSimplex.java @@ -0,0 +1,345 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.math3.optim.nonlinear.scalar.noderiv; + +import java.util.Arrays; +import java.util.Comparator; + +import org.apache.commons.math3.analysis.MultivariateFunction; +import org.apache.commons.math3.exception.NotStrictlyPositiveException; +import org.apache.commons.math3.exception.DimensionMismatchException; +import org.apache.commons.math3.exception.ZeroException; +import org.apache.commons.math3.exception.OutOfRangeException; +import org.apache.commons.math3.exception.NullArgumentException; +import org.apache.commons.math3.exception.MathIllegalArgumentException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.optim.PointValuePair; +import org.apache.commons.math3.optim.OptimizationData; + +/** + * This class implements the simplex concept. + * It is intended to be used in conjunction with {@link SimplexOptimizer}. + * <br/> + * The initial configuration of the simplex is set by the constructors + * {@link #AbstractSimplex(double[])} or {@link #AbstractSimplex(double[][])}. + * The other {@link #AbstractSimplex(int) constructor} will set all steps + * to 1, thus building a default configuration from a unit hypercube. + * <br/> + * Users <em>must</em> call the {@link #build(double[]) build} method in order + * to create the data structure that will be acted on by the other methods of + * this class. + * + * @see SimplexOptimizer + * @since 3.0 + */ +public abstract class AbstractSimplex implements OptimizationData { + /** Simplex. */ + private PointValuePair[] simplex; + /** Start simplex configuration. */ + private double[][] startConfiguration; + /** Simplex dimension (must be equal to {@code simplex.length - 1}). */ + private final int dimension; + + /** + * Build a unit hypercube simplex. + * + * @param n Dimension of the simplex. + */ + protected AbstractSimplex(int n) { + this(n, 1d); + } + + /** + * Build a hypercube simplex with the given side length. + * + * @param n Dimension of the simplex. + * @param sideLength Length of the sides of the hypercube. + */ + protected AbstractSimplex(int n, + double sideLength) { + this(createHypercubeSteps(n, sideLength)); + } + + /** + * The start configuration for simplex is built from a box parallel to + * the canonical axes of the space. The simplex is the subset of vertices + * of a box parallel to the canonical axes. It is built as the path followed + * while traveling from one vertex of the box to the diagonally opposite + * vertex moving only along the box edges. The first vertex of the box will + * be located at the start point of the optimization. + * As an example, in dimension 3 a simplex has 4 vertices. Setting the + * steps to (1, 10, 2) and the start point to (1, 1, 1) would imply the + * start simplex would be: { (1, 1, 1), (2, 1, 1), (2, 11, 1), (2, 11, 3) }. + * The first vertex would be set to the start point at (1, 1, 1) and the + * last vertex would be set to the diagonally opposite vertex at (2, 11, 3). + * + * @param steps Steps along the canonical axes representing box edges. They + * may be negative but not zero. + * @throws NullArgumentException if {@code steps} is {@code null}. + * @throws ZeroException if one of the steps is zero. + */ + protected AbstractSimplex(final double[] steps) { + if (steps == null) { + throw new NullArgumentException(); + } + if (steps.length == 0) { + throw new ZeroException(); + } + dimension = steps.length; + + // Only the relative position of the n final vertices with respect + // to the first one are stored. + startConfiguration = new double[dimension][dimension]; + for (int i = 0; i < dimension; i++) { + final double[] vertexI = startConfiguration[i]; + for (int j = 0; j < i + 1; j++) { + if (steps[j] == 0) { + throw new ZeroException(LocalizedFormats.EQUAL_VERTICES_IN_SIMPLEX); + } + System.arraycopy(steps, 0, vertexI, 0, j + 1); + } + } + } + + /** + * The real initial simplex will be set up by moving the reference + * simplex such that its first point is located at the start point of the + * optimization. + * + * @param referenceSimplex Reference simplex. + * @throws NotStrictlyPositiveException if the reference simplex does not + * contain at least one point. + * @throws DimensionMismatchException if there is a dimension mismatch + * in the reference simplex. + * @throws IllegalArgumentException if one of its vertices is duplicated. + */ + protected AbstractSimplex(final double[][] referenceSimplex) { + if (referenceSimplex.length <= 0) { + throw new NotStrictlyPositiveException(LocalizedFormats.SIMPLEX_NEED_ONE_POINT, + referenceSimplex.length); + } + dimension = referenceSimplex.length - 1; + + // Only the relative position of the n final vertices with respect + // to the first one are stored. + startConfiguration = new double[dimension][dimension]; + final double[] ref0 = referenceSimplex[0]; + + // Loop over vertices. + for (int i = 0; i < referenceSimplex.length; i++) { + final double[] refI = referenceSimplex[i]; + + // Safety checks. + if (refI.length != dimension) { + throw new DimensionMismatchException(refI.length, dimension); + } + for (int j = 0; j < i; j++) { + final double[] refJ = referenceSimplex[j]; + boolean allEquals = true; + for (int k = 0; k < dimension; k++) { + if (refI[k] != refJ[k]) { + allEquals = false; + break; + } + } + if (allEquals) { + throw new MathIllegalArgumentException(LocalizedFormats.EQUAL_VERTICES_IN_SIMPLEX, + i, j); + } + } + + // Store vertex i position relative to vertex 0 position. + if (i > 0) { + final double[] confI = startConfiguration[i - 1]; + for (int k = 0; k < dimension; k++) { + confI[k] = refI[k] - ref0[k]; + } + } + } + } + + /** + * Get simplex dimension. + * + * @return the dimension of the simplex. + */ + public int getDimension() { + return dimension; + } + + /** + * Get simplex size. + * After calling the {@link #build(double[]) build} method, this method will + * will be equivalent to {@code getDimension() + 1}. + * + * @return the size of the simplex. + */ + public int getSize() { + return simplex.length; + } + + /** + * Compute the next simplex of the algorithm. + * + * @param evaluationFunction Evaluation function. + * @param comparator Comparator to use to sort simplex vertices from best + * to worst. + * @throws org.apache.commons.math3.exception.TooManyEvaluationsException + * if the algorithm fails to converge. + */ + public abstract void iterate(final MultivariateFunction evaluationFunction, + final Comparator<PointValuePair> comparator); + + /** + * Build an initial simplex. + * + * @param startPoint First point of the simplex. + * @throws DimensionMismatchException if the start point does not match + * simplex dimension. + */ + public void build(final double[] startPoint) { + if (dimension != startPoint.length) { + throw new DimensionMismatchException(dimension, startPoint.length); + } + + // Set first vertex. + simplex = new PointValuePair[dimension + 1]; + simplex[0] = new PointValuePair(startPoint, Double.NaN); + + // Set remaining vertices. + for (int i = 0; i < dimension; i++) { + final double[] confI = startConfiguration[i]; + final double[] vertexI = new double[dimension]; + for (int k = 0; k < dimension; k++) { + vertexI[k] = startPoint[k] + confI[k]; + } + simplex[i + 1] = new PointValuePair(vertexI, Double.NaN); + } + } + + /** + * Evaluate all the non-evaluated points of the simplex. + * + * @param evaluationFunction Evaluation function. + * @param comparator Comparator to use to sort simplex vertices from best to worst. + * @throws org.apache.commons.math3.exception.TooManyEvaluationsException + * if the maximal number of evaluations is exceeded. + */ + public void evaluate(final MultivariateFunction evaluationFunction, + final Comparator<PointValuePair> comparator) { + // Evaluate the objective function at all non-evaluated simplex points. + for (int i = 0; i < simplex.length; i++) { + final PointValuePair vertex = simplex[i]; + final double[] point = vertex.getPointRef(); + if (Double.isNaN(vertex.getValue())) { + simplex[i] = new PointValuePair(point, evaluationFunction.value(point), false); + } + } + + // Sort the simplex from best to worst. + Arrays.sort(simplex, comparator); + } + + /** + * Replace the worst point of the simplex by a new point. + * + * @param pointValuePair Point to insert. + * @param comparator Comparator to use for sorting the simplex vertices + * from best to worst. + */ + protected void replaceWorstPoint(PointValuePair pointValuePair, + final Comparator<PointValuePair> comparator) { + for (int i = 0; i < dimension; i++) { + if (comparator.compare(simplex[i], pointValuePair) > 0) { + PointValuePair tmp = simplex[i]; + simplex[i] = pointValuePair; + pointValuePair = tmp; + } + } + simplex[dimension] = pointValuePair; + } + + /** + * Get the points of the simplex. + * + * @return all the simplex points. + */ + public PointValuePair[] getPoints() { + final PointValuePair[] copy = new PointValuePair[simplex.length]; + System.arraycopy(simplex, 0, copy, 0, simplex.length); + return copy; + } + + /** + * Get the simplex point stored at the requested {@code index}. + * + * @param index Location. + * @return the point at location {@code index}. + */ + public PointValuePair getPoint(int index) { + if (index < 0 || + index >= simplex.length) { + throw new OutOfRangeException(index, 0, simplex.length - 1); + } + return simplex[index]; + } + + /** + * Store a new point at location {@code index}. + * Note that no deep-copy of {@code point} is performed. + * + * @param index Location. + * @param point New value. + */ + protected void setPoint(int index, PointValuePair point) { + if (index < 0 || + index >= simplex.length) { + throw new OutOfRangeException(index, 0, simplex.length - 1); + } + simplex[index] = point; + } + + /** + * Replace all points. + * Note that no deep-copy of {@code points} is performed. + * + * @param points New Points. + */ + protected void setPoints(PointValuePair[] points) { + if (points.length != simplex.length) { + throw new DimensionMismatchException(points.length, simplex.length); + } + simplex = points; + } + + /** + * Create steps for a unit hypercube. + * + * @param n Dimension of the hypercube. + * @param sideLength Length of the sides of the hypercube. + * @return the steps. + */ + private static double[] createHypercubeSteps(int n, + double sideLength) { + final double[] steps = new double[n]; + for (int i = 0; i < n; i++) { + steps[i] = sideLength; + } + return steps; + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/noderiv/BOBYQAOptimizer.java b/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/noderiv/BOBYQAOptimizer.java new file mode 100644 index 0000000..e5bf39f --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/noderiv/BOBYQAOptimizer.java @@ -0,0 +1,2475 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// CHECKSTYLE: stop all +package org.apache.commons.math3.optim.nonlinear.scalar.noderiv; + +import org.apache.commons.math3.exception.MathIllegalStateException; +import org.apache.commons.math3.exception.NumberIsTooSmallException; +import org.apache.commons.math3.exception.OutOfRangeException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.linear.Array2DRowRealMatrix; +import org.apache.commons.math3.linear.ArrayRealVector; +import org.apache.commons.math3.linear.RealVector; +import org.apache.commons.math3.optim.nonlinear.scalar.GoalType; +import org.apache.commons.math3.optim.PointValuePair; +import org.apache.commons.math3.optim.nonlinear.scalar.MultivariateOptimizer; +import org.apache.commons.math3.util.FastMath; + +/** + * Powell's BOBYQA algorithm. This implementation is translated and + * adapted from the Fortran version available + * <a href="http://plato.asu.edu/ftp/other_software/bobyqa.zip">here</a>. + * See <a href="http://www.optimization-online.org/DB_HTML/2010/05/2616.html"> + * this paper</a> for an introduction. + * <br/> + * BOBYQA is particularly well suited for high dimensional problems + * where derivatives are not available. In most cases it outperforms the + * {@link PowellOptimizer} significantly. Stochastic algorithms like + * {@link CMAESOptimizer} succeed more often than BOBYQA, but are more + * expensive. BOBYQA could also be considered as a replacement of any + * derivative-based optimizer when the derivatives are approximated by + * finite differences. + * + * @since 3.0 + */ +public class BOBYQAOptimizer + extends MultivariateOptimizer { + /** Minimum dimension of the problem: {@value} */ + public static final int MINIMUM_PROBLEM_DIMENSION = 2; + /** Default value for {@link #initialTrustRegionRadius}: {@value} . */ + public static final double DEFAULT_INITIAL_RADIUS = 10.0; + /** Default value for {@link #stoppingTrustRegionRadius}: {@value} . */ + public static final double DEFAULT_STOPPING_RADIUS = 1E-8; + /** Constant 0. */ + private static final double ZERO = 0d; + /** Constant 1. */ + private static final double ONE = 1d; + /** Constant 2. */ + private static final double TWO = 2d; + /** Constant 10. */ + private static final double TEN = 10d; + /** Constant 16. */ + private static final double SIXTEEN = 16d; + /** Constant 250. */ + private static final double TWO_HUNDRED_FIFTY = 250d; + /** Constant -1. */ + private static final double MINUS_ONE = -ONE; + /** Constant 1/2. */ + private static final double HALF = ONE / 2; + /** Constant 1/4. */ + private static final double ONE_OVER_FOUR = ONE / 4; + /** Constant 1/8. */ + private static final double ONE_OVER_EIGHT = ONE / 8; + /** Constant 1/10. */ + private static final double ONE_OVER_TEN = ONE / 10; + /** Constant 1/1000. */ + private static final double ONE_OVER_A_THOUSAND = ONE / 1000; + + /** + * numberOfInterpolationPoints XXX + */ + private final int numberOfInterpolationPoints; + /** + * initialTrustRegionRadius XXX + */ + private double initialTrustRegionRadius; + /** + * stoppingTrustRegionRadius XXX + */ + private final double stoppingTrustRegionRadius; + /** Goal type (minimize or maximize). */ + private boolean isMinimize; + /** + * Current best values for the variables to be optimized. + * The vector will be changed in-place to contain the values of the least + * calculated objective function values. + */ + private ArrayRealVector currentBest; + /** Differences between the upper and lower bounds. */ + private double[] boundDifference; + /** + * Index of the interpolation point at the trust region center. + */ + private int trustRegionCenterInterpolationPointIndex; + /** + * Last <em>n</em> columns of matrix H (where <em>n</em> is the dimension + * of the problem). + * XXX "bmat" in the original code. + */ + private Array2DRowRealMatrix bMatrix; + /** + * Factorization of the leading <em>npt</em> square submatrix of H, this + * factorization being Z Z<sup>T</sup>, which provides both the correct + * rank and positive semi-definiteness. + * XXX "zmat" in the original code. + */ + private Array2DRowRealMatrix zMatrix; + /** + * Coordinates of the interpolation points relative to {@link #originShift}. + * XXX "xpt" in the original code. + */ + private Array2DRowRealMatrix interpolationPoints; + /** + * Shift of origin that should reduce the contributions from rounding + * errors to values of the model and Lagrange functions. + * XXX "xbase" in the original code. + */ + private ArrayRealVector originShift; + /** + * Values of the objective function at the interpolation points. + * XXX "fval" in the original code. + */ + private ArrayRealVector fAtInterpolationPoints; + /** + * Displacement from {@link #originShift} of the trust region center. + * XXX "xopt" in the original code. + */ + private ArrayRealVector trustRegionCenterOffset; + /** + * Gradient of the quadratic model at {@link #originShift} + + * {@link #trustRegionCenterOffset}. + * XXX "gopt" in the original code. + */ + private ArrayRealVector gradientAtTrustRegionCenter; + /** + * Differences {@link #getLowerBound()} - {@link #originShift}. + * All the components of every {@link #trustRegionCenterOffset} are going + * to satisfy the bounds<br/> + * {@link #getLowerBound() lowerBound}<sub>i</sub> ≤ + * {@link #trustRegionCenterOffset}<sub>i</sub>,<br/> + * with appropriate equalities when {@link #trustRegionCenterOffset} is + * on a constraint boundary. + * XXX "sl" in the original code. + */ + private ArrayRealVector lowerDifference; + /** + * Differences {@link #getUpperBound()} - {@link #originShift} + * All the components of every {@link #trustRegionCenterOffset} are going + * to satisfy the bounds<br/> + * {@link #trustRegionCenterOffset}<sub>i</sub> ≤ + * {@link #getUpperBound() upperBound}<sub>i</sub>,<br/> + * with appropriate equalities when {@link #trustRegionCenterOffset} is + * on a constraint boundary. + * XXX "su" in the original code. + */ + private ArrayRealVector upperDifference; + /** + * Parameters of the implicit second derivatives of the quadratic model. + * XXX "pq" in the original code. + */ + private ArrayRealVector modelSecondDerivativesParameters; + /** + * Point chosen by function {@link #trsbox(double,ArrayRealVector, + * ArrayRealVector, ArrayRealVector,ArrayRealVector,ArrayRealVector) trsbox} + * or {@link #altmov(int,double) altmov}. + * Usually {@link #originShift} + {@link #newPoint} is the vector of + * variables for the next evaluation of the objective function. + * It also satisfies the constraints indicated in {@link #lowerDifference} + * and {@link #upperDifference}. + * XXX "xnew" in the original code. + */ + private ArrayRealVector newPoint; + /** + * Alternative to {@link #newPoint}, chosen by + * {@link #altmov(int,double) altmov}. + * It may replace {@link #newPoint} in order to increase the denominator + * in the {@link #update(double, double, int) updating procedure}. + * XXX "xalt" in the original code. + */ + private ArrayRealVector alternativeNewPoint; + /** + * Trial step from {@link #trustRegionCenterOffset} which is usually + * {@link #newPoint} - {@link #trustRegionCenterOffset}. + * XXX "d__" in the original code. + */ + private ArrayRealVector trialStepPoint; + /** + * Values of the Lagrange functions at a new point. + * XXX "vlag" in the original code. + */ + private ArrayRealVector lagrangeValuesAtNewPoint; + /** + * Explicit second derivatives of the quadratic model. + * XXX "hq" in the original code. + */ + private ArrayRealVector modelSecondDerivativesValues; + + /** + * @param numberOfInterpolationPoints Number of interpolation conditions. + * For a problem of dimension {@code n}, its value must be in the interval + * {@code [n+2, (n+1)(n+2)/2]}. + * Choices that exceed {@code 2n+1} are not recommended. + */ + public BOBYQAOptimizer(int numberOfInterpolationPoints) { + this(numberOfInterpolationPoints, + DEFAULT_INITIAL_RADIUS, + DEFAULT_STOPPING_RADIUS); + } + + /** + * @param numberOfInterpolationPoints Number of interpolation conditions. + * For a problem of dimension {@code n}, its value must be in the interval + * {@code [n+2, (n+1)(n+2)/2]}. + * Choices that exceed {@code 2n+1} are not recommended. + * @param initialTrustRegionRadius Initial trust region radius. + * @param stoppingTrustRegionRadius Stopping trust region radius. + */ + public BOBYQAOptimizer(int numberOfInterpolationPoints, + double initialTrustRegionRadius, + double stoppingTrustRegionRadius) { + super(null); // No custom convergence criterion. + this.numberOfInterpolationPoints = numberOfInterpolationPoints; + this.initialTrustRegionRadius = initialTrustRegionRadius; + this.stoppingTrustRegionRadius = stoppingTrustRegionRadius; + } + + /** {@inheritDoc} */ + @Override + protected PointValuePair doOptimize() { + final double[] lowerBound = getLowerBound(); + final double[] upperBound = getUpperBound(); + + // Validity checks. + setup(lowerBound, upperBound); + + isMinimize = (getGoalType() == GoalType.MINIMIZE); + currentBest = new ArrayRealVector(getStartPoint()); + + final double value = bobyqa(lowerBound, upperBound); + + return new PointValuePair(currentBest.getDataRef(), + isMinimize ? value : -value); + } + + /** + * This subroutine seeks the least value of a function of many variables, + * by applying a trust region method that forms quadratic models by + * interpolation. There is usually some freedom in the interpolation + * conditions, which is taken up by minimizing the Frobenius norm of + * the change to the second derivative of the model, beginning with the + * zero matrix. The values of the variables are constrained by upper and + * lower bounds. The arguments of the subroutine are as follows. + * + * N must be set to the number of variables and must be at least two. + * NPT is the number of interpolation conditions. Its value must be in + * the interval [N+2,(N+1)(N+2)/2]. Choices that exceed 2*N+1 are not + * recommended. + * Initial values of the variables must be set in X(1),X(2),...,X(N). They + * will be changed to the values that give the least calculated F. + * For I=1,2,...,N, XL(I) and XU(I) must provide the lower and upper + * bounds, respectively, on X(I). The construction of quadratic models + * requires XL(I) to be strictly less than XU(I) for each I. Further, + * the contribution to a model from changes to the I-th variable is + * damaged severely by rounding errors if XU(I)-XL(I) is too small. + * RHOBEG and RHOEND must be set to the initial and final values of a trust + * region radius, so both must be positive with RHOEND no greater than + * RHOBEG. Typically, RHOBEG should be about one tenth of the greatest + * expected change to a variable, while RHOEND should indicate the + * accuracy that is required in the final values of the variables. An + * error return occurs if any of the differences XU(I)-XL(I), I=1,...,N, + * is less than 2*RHOBEG. + * MAXFUN must be set to an upper bound on the number of calls of CALFUN. + * The array W will be used for working space. Its length must be at least + * (NPT+5)*(NPT+N)+3*N*(N+5)/2. + * + * @param lowerBound Lower bounds. + * @param upperBound Upper bounds. + * @return the value of the objective at the optimum. + */ + private double bobyqa(double[] lowerBound, + double[] upperBound) { + printMethod(); // XXX + + final int n = currentBest.getDimension(); + + // Return if there is insufficient space between the bounds. Modify the + // initial X if necessary in order to avoid conflicts between the bounds + // and the construction of the first quadratic model. The lower and upper + // bounds on moves from the updated X are set now, in the ISL and ISU + // partitions of W, in order to provide useful and exact information about + // components of X that become within distance RHOBEG from their bounds. + + for (int j = 0; j < n; j++) { + final double boundDiff = boundDifference[j]; + lowerDifference.setEntry(j, lowerBound[j] - currentBest.getEntry(j)); + upperDifference.setEntry(j, upperBound[j] - currentBest.getEntry(j)); + if (lowerDifference.getEntry(j) >= -initialTrustRegionRadius) { + if (lowerDifference.getEntry(j) >= ZERO) { + currentBest.setEntry(j, lowerBound[j]); + lowerDifference.setEntry(j, ZERO); + upperDifference.setEntry(j, boundDiff); + } else { + currentBest.setEntry(j, lowerBound[j] + initialTrustRegionRadius); + lowerDifference.setEntry(j, -initialTrustRegionRadius); + // Computing MAX + final double deltaOne = upperBound[j] - currentBest.getEntry(j); + upperDifference.setEntry(j, FastMath.max(deltaOne, initialTrustRegionRadius)); + } + } else if (upperDifference.getEntry(j) <= initialTrustRegionRadius) { + if (upperDifference.getEntry(j) <= ZERO) { + currentBest.setEntry(j, upperBound[j]); + lowerDifference.setEntry(j, -boundDiff); + upperDifference.setEntry(j, ZERO); + } else { + currentBest.setEntry(j, upperBound[j] - initialTrustRegionRadius); + // Computing MIN + final double deltaOne = lowerBound[j] - currentBest.getEntry(j); + final double deltaTwo = -initialTrustRegionRadius; + lowerDifference.setEntry(j, FastMath.min(deltaOne, deltaTwo)); + upperDifference.setEntry(j, initialTrustRegionRadius); + } + } + } + + // Make the call of BOBYQB. + + return bobyqb(lowerBound, upperBound); + } // bobyqa + + // ---------------------------------------------------------------------------------------- + + /** + * The arguments N, NPT, X, XL, XU, RHOBEG, RHOEND, IPRINT and MAXFUN + * are identical to the corresponding arguments in SUBROUTINE BOBYQA. + * XBASE holds a shift of origin that should reduce the contributions + * from rounding errors to values of the model and Lagrange functions. + * XPT is a two-dimensional array that holds the coordinates of the + * interpolation points relative to XBASE. + * FVAL holds the values of F at the interpolation points. + * XOPT is set to the displacement from XBASE of the trust region centre. + * GOPT holds the gradient of the quadratic model at XBASE+XOPT. + * HQ holds the explicit second derivatives of the quadratic model. + * PQ contains the parameters of the implicit second derivatives of the + * quadratic model. + * BMAT holds the last N columns of H. + * ZMAT holds the factorization of the leading NPT by NPT submatrix of H, + * this factorization being ZMAT times ZMAT^T, which provides both the + * correct rank and positive semi-definiteness. + * NDIM is the first dimension of BMAT and has the value NPT+N. + * SL and SU hold the differences XL-XBASE and XU-XBASE, respectively. + * All the components of every XOPT are going to satisfy the bounds + * SL(I) .LEQ. XOPT(I) .LEQ. SU(I), with appropriate equalities when + * XOPT is on a constraint boundary. + * XNEW is chosen by SUBROUTINE TRSBOX or ALTMOV. Usually XBASE+XNEW is the + * vector of variables for the next call of CALFUN. XNEW also satisfies + * the SL and SU constraints in the way that has just been mentioned. + * XALT is an alternative to XNEW, chosen by ALTMOV, that may replace XNEW + * in order to increase the denominator in the updating of UPDATE. + * D is reserved for a trial step from XOPT, which is usually XNEW-XOPT. + * VLAG contains the values of the Lagrange functions at a new point X. + * They are part of a product that requires VLAG to be of length NDIM. + * W is a one-dimensional array that is used for working space. Its length + * must be at least 3*NDIM = 3*(NPT+N). + * + * @param lowerBound Lower bounds. + * @param upperBound Upper bounds. + * @return the value of the objective at the optimum. + */ + private double bobyqb(double[] lowerBound, + double[] upperBound) { + printMethod(); // XXX + + final int n = currentBest.getDimension(); + final int npt = numberOfInterpolationPoints; + final int np = n + 1; + final int nptm = npt - np; + final int nh = n * np / 2; + + final ArrayRealVector work1 = new ArrayRealVector(n); + final ArrayRealVector work2 = new ArrayRealVector(npt); + final ArrayRealVector work3 = new ArrayRealVector(npt); + + double cauchy = Double.NaN; + double alpha = Double.NaN; + double dsq = Double.NaN; + double crvmin = Double.NaN; + + // Set some constants. + // Parameter adjustments + + // Function Body + + // The call of PRELIM sets the elements of XBASE, XPT, FVAL, GOPT, HQ, PQ, + // BMAT and ZMAT for the first iteration, with the corresponding values of + // of NF and KOPT, which are the number of calls of CALFUN so far and the + // index of the interpolation point at the trust region centre. Then the + // initial XOPT is set too. The branch to label 720 occurs if MAXFUN is + // less than NPT. GOPT will be updated if KOPT is different from KBASE. + + trustRegionCenterInterpolationPointIndex = 0; + + prelim(lowerBound, upperBound); + double xoptsq = ZERO; + for (int i = 0; i < n; i++) { + trustRegionCenterOffset.setEntry(i, interpolationPoints.getEntry(trustRegionCenterInterpolationPointIndex, i)); + // Computing 2nd power + final double deltaOne = trustRegionCenterOffset.getEntry(i); + xoptsq += deltaOne * deltaOne; + } + double fsave = fAtInterpolationPoints.getEntry(0); + final int kbase = 0; + + // Complete the settings that are required for the iterative procedure. + + int ntrits = 0; + int itest = 0; + int knew = 0; + int nfsav = getEvaluations(); + double rho = initialTrustRegionRadius; + double delta = rho; + double diffa = ZERO; + double diffb = ZERO; + double diffc = ZERO; + double f = ZERO; + double beta = ZERO; + double adelt = ZERO; + double denom = ZERO; + double ratio = ZERO; + double dnorm = ZERO; + double scaden = ZERO; + double biglsq = ZERO; + double distsq = ZERO; + + // Update GOPT if necessary before the first iteration and after each + // call of RESCUE that makes a call of CALFUN. + + int state = 20; + for(;;) { + switch (state) { + case 20: { + printState(20); // XXX + if (trustRegionCenterInterpolationPointIndex != kbase) { + int ih = 0; + for (int j = 0; j < n; j++) { + for (int i = 0; i <= j; i++) { + if (i < j) { + gradientAtTrustRegionCenter.setEntry(j, gradientAtTrustRegionCenter.getEntry(j) + modelSecondDerivativesValues.getEntry(ih) * trustRegionCenterOffset.getEntry(i)); + } + gradientAtTrustRegionCenter.setEntry(i, gradientAtTrustRegionCenter.getEntry(i) + modelSecondDerivativesValues.getEntry(ih) * trustRegionCenterOffset.getEntry(j)); + ih++; + } + } + if (getEvaluations() > npt) { + for (int k = 0; k < npt; k++) { + double temp = ZERO; + for (int j = 0; j < n; j++) { + temp += interpolationPoints.getEntry(k, j) * trustRegionCenterOffset.getEntry(j); + } + temp *= modelSecondDerivativesParameters.getEntry(k); + for (int i = 0; i < n; i++) { + gradientAtTrustRegionCenter.setEntry(i, gradientAtTrustRegionCenter.getEntry(i) + temp * interpolationPoints.getEntry(k, i)); + } + } + // throw new PathIsExploredException(); // XXX + } + } + + // Generate the next point in the trust region that provides a small value + // of the quadratic model subject to the constraints on the variables. + // The int NTRITS is set to the number "trust region" iterations that + // have occurred since the last "alternative" iteration. If the length + // of XNEW-XOPT is less than HALF*RHO, however, then there is a branch to + // label 650 or 680 with NTRITS=-1, instead of calculating F at XNEW. + + } + case 60: { + printState(60); // XXX + final ArrayRealVector gnew = new ArrayRealVector(n); + final ArrayRealVector xbdi = new ArrayRealVector(n); + final ArrayRealVector s = new ArrayRealVector(n); + final ArrayRealVector hs = new ArrayRealVector(n); + final ArrayRealVector hred = new ArrayRealVector(n); + + final double[] dsqCrvmin = trsbox(delta, gnew, xbdi, s, + hs, hred); + dsq = dsqCrvmin[0]; + crvmin = dsqCrvmin[1]; + + // Computing MIN + double deltaOne = delta; + double deltaTwo = FastMath.sqrt(dsq); + dnorm = FastMath.min(deltaOne, deltaTwo); + if (dnorm < HALF * rho) { + ntrits = -1; + // Computing 2nd power + deltaOne = TEN * rho; + distsq = deltaOne * deltaOne; + if (getEvaluations() <= nfsav + 2) { + state = 650; break; + } + + // The following choice between labels 650 and 680 depends on whether or + // not our work with the current RHO seems to be complete. Either RHO is + // decreased or termination occurs if the errors in the quadratic model at + // the last three interpolation points compare favourably with predictions + // of likely improvements to the model within distance HALF*RHO of XOPT. + + // Computing MAX + deltaOne = FastMath.max(diffa, diffb); + final double errbig = FastMath.max(deltaOne, diffc); + final double frhosq = rho * ONE_OVER_EIGHT * rho; + if (crvmin > ZERO && + errbig > frhosq * crvmin) { + state = 650; break; + } + final double bdtol = errbig / rho; + for (int j = 0; j < n; j++) { + double bdtest = bdtol; + if (newPoint.getEntry(j) == lowerDifference.getEntry(j)) { + bdtest = work1.getEntry(j); + } + if (newPoint.getEntry(j) == upperDifference.getEntry(j)) { + bdtest = -work1.getEntry(j); + } + if (bdtest < bdtol) { + double curv = modelSecondDerivativesValues.getEntry((j + j * j) / 2); + for (int k = 0; k < npt; k++) { + // Computing 2nd power + final double d1 = interpolationPoints.getEntry(k, j); + curv += modelSecondDerivativesParameters.getEntry(k) * (d1 * d1); + } + bdtest += HALF * curv * rho; + if (bdtest < bdtol) { + state = 650; break; + } + // throw new PathIsExploredException(); // XXX + } + } + state = 680; break; + } + ++ntrits; + + // Severe cancellation is likely to occur if XOPT is too far from XBASE. + // If the following test holds, then XBASE is shifted so that XOPT becomes + // zero. The appropriate changes are made to BMAT and to the second + // derivatives of the current model, beginning with the changes to BMAT + // that do not depend on ZMAT. VLAG is used temporarily for working space. + + } + case 90: { + printState(90); // XXX + if (dsq <= xoptsq * ONE_OVER_A_THOUSAND) { + final double fracsq = xoptsq * ONE_OVER_FOUR; + double sumpq = ZERO; + // final RealVector sumVector + // = new ArrayRealVector(npt, -HALF * xoptsq).add(interpolationPoints.operate(trustRegionCenter)); + for (int k = 0; k < npt; k++) { + sumpq += modelSecondDerivativesParameters.getEntry(k); + double sum = -HALF * xoptsq; + for (int i = 0; i < n; i++) { + sum += interpolationPoints.getEntry(k, i) * trustRegionCenterOffset.getEntry(i); + } + // sum = sumVector.getEntry(k); // XXX "testAckley" and "testDiffPow" fail. + work2.setEntry(k, sum); + final double temp = fracsq - HALF * sum; + for (int i = 0; i < n; i++) { + work1.setEntry(i, bMatrix.getEntry(k, i)); + lagrangeValuesAtNewPoint.setEntry(i, sum * interpolationPoints.getEntry(k, i) + temp * trustRegionCenterOffset.getEntry(i)); + final int ip = npt + i; + for (int j = 0; j <= i; j++) { + bMatrix.setEntry(ip, j, + bMatrix.getEntry(ip, j) + + work1.getEntry(i) * lagrangeValuesAtNewPoint.getEntry(j) + + lagrangeValuesAtNewPoint.getEntry(i) * work1.getEntry(j)); + } + } + } + + // Then the revisions of BMAT that depend on ZMAT are calculated. + + for (int m = 0; m < nptm; m++) { + double sumz = ZERO; + double sumw = ZERO; + for (int k = 0; k < npt; k++) { + sumz += zMatrix.getEntry(k, m); + lagrangeValuesAtNewPoint.setEntry(k, work2.getEntry(k) * zMatrix.getEntry(k, m)); + sumw += lagrangeValuesAtNewPoint.getEntry(k); + } + for (int j = 0; j < n; j++) { + double sum = (fracsq * sumz - HALF * sumw) * trustRegionCenterOffset.getEntry(j); + for (int k = 0; k < npt; k++) { + sum += lagrangeValuesAtNewPoint.getEntry(k) * interpolationPoints.getEntry(k, j); + } + work1.setEntry(j, sum); + for (int k = 0; k < npt; k++) { + bMatrix.setEntry(k, j, + bMatrix.getEntry(k, j) + + sum * zMatrix.getEntry(k, m)); + } + } + for (int i = 0; i < n; i++) { + final int ip = i + npt; + final double temp = work1.getEntry(i); + for (int j = 0; j <= i; j++) { + bMatrix.setEntry(ip, j, + bMatrix.getEntry(ip, j) + + temp * work1.getEntry(j)); + } + } + } + + // The following instructions complete the shift, including the changes + // to the second derivative parameters of the quadratic model. + + int ih = 0; + for (int j = 0; j < n; j++) { + work1.setEntry(j, -HALF * sumpq * trustRegionCenterOffset.getEntry(j)); + for (int k = 0; k < npt; k++) { + work1.setEntry(j, work1.getEntry(j) + modelSecondDerivativesParameters.getEntry(k) * interpolationPoints.getEntry(k, j)); + interpolationPoints.setEntry(k, j, interpolationPoints.getEntry(k, j) - trustRegionCenterOffset.getEntry(j)); + } + for (int i = 0; i <= j; i++) { + modelSecondDerivativesValues.setEntry(ih, + modelSecondDerivativesValues.getEntry(ih) + + work1.getEntry(i) * trustRegionCenterOffset.getEntry(j) + + trustRegionCenterOffset.getEntry(i) * work1.getEntry(j)); + bMatrix.setEntry(npt + i, j, bMatrix.getEntry(npt + j, i)); + ih++; + } + } + for (int i = 0; i < n; i++) { + originShift.setEntry(i, originShift.getEntry(i) + trustRegionCenterOffset.getEntry(i)); + newPoint.setEntry(i, newPoint.getEntry(i) - trustRegionCenterOffset.getEntry(i)); + lowerDifference.setEntry(i, lowerDifference.getEntry(i) - trustRegionCenterOffset.getEntry(i)); + upperDifference.setEntry(i, upperDifference.getEntry(i) - trustRegionCenterOffset.getEntry(i)); + trustRegionCenterOffset.setEntry(i, ZERO); + } + xoptsq = ZERO; + } + if (ntrits == 0) { + state = 210; break; + } + state = 230; break; + + // XBASE is also moved to XOPT by a call of RESCUE. This calculation is + // more expensive than the previous shift, because new matrices BMAT and + // ZMAT are generated from scratch, which may include the replacement of + // interpolation points whose positions seem to be causing near linear + // dependence in the interpolation conditions. Therefore RESCUE is called + // only if rounding errors have reduced by at least a factor of two the + // denominator of the formula for updating the H matrix. It provides a + // useful safeguard, but is not invoked in most applications of BOBYQA. + + } + case 210: { + printState(210); // XXX + // Pick two alternative vectors of variables, relative to XBASE, that + // are suitable as new positions of the KNEW-th interpolation point. + // Firstly, XNEW is set to the point on a line through XOPT and another + // interpolation point that minimizes the predicted value of the next + // denominator, subject to ||XNEW - XOPT|| .LEQ. ADELT and to the SL + // and SU bounds. Secondly, XALT is set to the best feasible point on + // a constrained version of the Cauchy step of the KNEW-th Lagrange + // function, the corresponding value of the square of this function + // being returned in CAUCHY. The choice between these alternatives is + // going to be made when the denominator is calculated. + + final double[] alphaCauchy = altmov(knew, adelt); + alpha = alphaCauchy[0]; + cauchy = alphaCauchy[1]; + + for (int i = 0; i < n; i++) { + trialStepPoint.setEntry(i, newPoint.getEntry(i) - trustRegionCenterOffset.getEntry(i)); + } + + // Calculate VLAG and BETA for the current choice of D. The scalar + // product of D with XPT(K,.) is going to be held in W(NPT+K) for + // use when VQUAD is calculated. + + } + case 230: { + printState(230); // XXX + for (int k = 0; k < npt; k++) { + double suma = ZERO; + double sumb = ZERO; + double sum = ZERO; + for (int j = 0; j < n; j++) { + suma += interpolationPoints.getEntry(k, j) * trialStepPoint.getEntry(j); + sumb += interpolationPoints.getEntry(k, j) * trustRegionCenterOffset.getEntry(j); + sum += bMatrix.getEntry(k, j) * trialStepPoint.getEntry(j); + } + work3.setEntry(k, suma * (HALF * suma + sumb)); + lagrangeValuesAtNewPoint.setEntry(k, sum); + work2.setEntry(k, suma); + } + beta = ZERO; + for (int m = 0; m < nptm; m++) { + double sum = ZERO; + for (int k = 0; k < npt; k++) { + sum += zMatrix.getEntry(k, m) * work3.getEntry(k); + } + beta -= sum * sum; + for (int k = 0; k < npt; k++) { + lagrangeValuesAtNewPoint.setEntry(k, lagrangeValuesAtNewPoint.getEntry(k) + sum * zMatrix.getEntry(k, m)); + } + } + dsq = ZERO; + double bsum = ZERO; + double dx = ZERO; + for (int j = 0; j < n; j++) { + // Computing 2nd power + final double d1 = trialStepPoint.getEntry(j); + dsq += d1 * d1; + double sum = ZERO; + for (int k = 0; k < npt; k++) { + sum += work3.getEntry(k) * bMatrix.getEntry(k, j); + } + bsum += sum * trialStepPoint.getEntry(j); + final int jp = npt + j; + for (int i = 0; i < n; i++) { + sum += bMatrix.getEntry(jp, i) * trialStepPoint.getEntry(i); + } + lagrangeValuesAtNewPoint.setEntry(jp, sum); + bsum += sum * trialStepPoint.getEntry(j); + dx += trialStepPoint.getEntry(j) * trustRegionCenterOffset.getEntry(j); + } + + beta = dx * dx + dsq * (xoptsq + dx + dx + HALF * dsq) + beta - bsum; // Original + // beta += dx * dx + dsq * (xoptsq + dx + dx + HALF * dsq) - bsum; // XXX "testAckley" and "testDiffPow" fail. + // beta = dx * dx + dsq * (xoptsq + 2 * dx + HALF * dsq) + beta - bsum; // XXX "testDiffPow" fails. + + lagrangeValuesAtNewPoint.setEntry(trustRegionCenterInterpolationPointIndex, + lagrangeValuesAtNewPoint.getEntry(trustRegionCenterInterpolationPointIndex) + ONE); + + // If NTRITS is zero, the denominator may be increased by replacing + // the step D of ALTMOV by a Cauchy step. Then RESCUE may be called if + // rounding errors have damaged the chosen denominator. + + if (ntrits == 0) { + // Computing 2nd power + final double d1 = lagrangeValuesAtNewPoint.getEntry(knew); + denom = d1 * d1 + alpha * beta; + if (denom < cauchy && cauchy > ZERO) { + for (int i = 0; i < n; i++) { + newPoint.setEntry(i, alternativeNewPoint.getEntry(i)); + trialStepPoint.setEntry(i, newPoint.getEntry(i) - trustRegionCenterOffset.getEntry(i)); + } + cauchy = ZERO; // XXX Useful statement? + state = 230; break; + } + // Alternatively, if NTRITS is positive, then set KNEW to the index of + // the next interpolation point to be deleted to make room for a trust + // region step. Again RESCUE may be called if rounding errors have damaged_ + // the chosen denominator, which is the reason for attempting to select + // KNEW before calculating the next value of the objective function. + + } else { + final double delsq = delta * delta; + scaden = ZERO; + biglsq = ZERO; + knew = 0; + for (int k = 0; k < npt; k++) { + if (k == trustRegionCenterInterpolationPointIndex) { + continue; + } + double hdiag = ZERO; + for (int m = 0; m < nptm; m++) { + // Computing 2nd power + final double d1 = zMatrix.getEntry(k, m); + hdiag += d1 * d1; + } + // Computing 2nd power + final double d2 = lagrangeValuesAtNewPoint.getEntry(k); + final double den = beta * hdiag + d2 * d2; + distsq = ZERO; + for (int j = 0; j < n; j++) { + // Computing 2nd power + final double d3 = interpolationPoints.getEntry(k, j) - trustRegionCenterOffset.getEntry(j); + distsq += d3 * d3; + } + // Computing MAX + // Computing 2nd power + final double d4 = distsq / delsq; + final double temp = FastMath.max(ONE, d4 * d4); + if (temp * den > scaden) { + scaden = temp * den; + knew = k; + denom = den; + } + // Computing MAX + // Computing 2nd power + final double d5 = lagrangeValuesAtNewPoint.getEntry(k); + biglsq = FastMath.max(biglsq, temp * (d5 * d5)); + } + } + + // Put the variables for the next calculation of the objective function + // in XNEW, with any adjustments for the bounds. + + // Calculate the value of the objective function at XBASE+XNEW, unless + // the limit on the number of calculations of F has been reached. + + } + case 360: { + printState(360); // XXX + for (int i = 0; i < n; i++) { + // Computing MIN + // Computing MAX + final double d3 = lowerBound[i]; + final double d4 = originShift.getEntry(i) + newPoint.getEntry(i); + final double d1 = FastMath.max(d3, d4); + final double d2 = upperBound[i]; + currentBest.setEntry(i, FastMath.min(d1, d2)); + if (newPoint.getEntry(i) == lowerDifference.getEntry(i)) { + currentBest.setEntry(i, lowerBound[i]); + } + if (newPoint.getEntry(i) == upperDifference.getEntry(i)) { + currentBest.setEntry(i, upperBound[i]); + } + } + + f = computeObjectiveValue(currentBest.toArray()); + + if (!isMinimize) { + f = -f; + } + if (ntrits == -1) { + fsave = f; + state = 720; break; + } + + // Use the quadratic model to predict the change in F due to the step D, + // and set DIFF to the error of this prediction. + + final double fopt = fAtInterpolationPoints.getEntry(trustRegionCenterInterpolationPointIndex); + double vquad = ZERO; + int ih = 0; + for (int j = 0; j < n; j++) { + vquad += trialStepPoint.getEntry(j) * gradientAtTrustRegionCenter.getEntry(j); + for (int i = 0; i <= j; i++) { + double temp = trialStepPoint.getEntry(i) * trialStepPoint.getEntry(j); + if (i == j) { + temp *= HALF; + } + vquad += modelSecondDerivativesValues.getEntry(ih) * temp; + ih++; + } + } + for (int k = 0; k < npt; k++) { + // Computing 2nd power + final double d1 = work2.getEntry(k); + final double d2 = d1 * d1; // "d1" must be squared first to prevent test failures. + vquad += HALF * modelSecondDerivativesParameters.getEntry(k) * d2; + } + final double diff = f - fopt - vquad; + diffc = diffb; + diffb = diffa; + diffa = FastMath.abs(diff); + if (dnorm > rho) { + nfsav = getEvaluations(); + } + + // Pick the next value of DELTA after a trust region step. + + if (ntrits > 0) { + if (vquad >= ZERO) { + throw new MathIllegalStateException(LocalizedFormats.TRUST_REGION_STEP_FAILED, vquad); + } + ratio = (f - fopt) / vquad; + final double hDelta = HALF * delta; + if (ratio <= ONE_OVER_TEN) { + // Computing MIN + delta = FastMath.min(hDelta, dnorm); + } else if (ratio <= .7) { + // Computing MAX + delta = FastMath.max(hDelta, dnorm); + } else { + // Computing MAX + delta = FastMath.max(hDelta, 2 * dnorm); + } + if (delta <= rho * 1.5) { + delta = rho; + } + + // Recalculate KNEW and DENOM if the new F is less than FOPT. + + if (f < fopt) { + final int ksav = knew; + final double densav = denom; + final double delsq = delta * delta; + scaden = ZERO; + biglsq = ZERO; + knew = 0; + for (int k = 0; k < npt; k++) { + double hdiag = ZERO; + for (int m = 0; m < nptm; m++) { + // Computing 2nd power + final double d1 = zMatrix.getEntry(k, m); + hdiag += d1 * d1; + } + // Computing 2nd power + final double d1 = lagrangeValuesAtNewPoint.getEntry(k); + final double den = beta * hdiag + d1 * d1; + distsq = ZERO; + for (int j = 0; j < n; j++) { + // Computing 2nd power + final double d2 = interpolationPoints.getEntry(k, j) - newPoint.getEntry(j); + distsq += d2 * d2; + } + // Computing MAX + // Computing 2nd power + final double d3 = distsq / delsq; + final double temp = FastMath.max(ONE, d3 * d3); + if (temp * den > scaden) { + scaden = temp * den; + knew = k; + denom = den; + } + // Computing MAX + // Computing 2nd power + final double d4 = lagrangeValuesAtNewPoint.getEntry(k); + final double d5 = temp * (d4 * d4); + biglsq = FastMath.max(biglsq, d5); + } + if (scaden <= HALF * biglsq) { + knew = ksav; + denom = densav; + } + } + } + + // Update BMAT and ZMAT, so that the KNEW-th interpolation point can be + // moved. Also update the second derivative terms of the model. + + update(beta, denom, knew); + + ih = 0; + final double pqold = modelSecondDerivativesParameters.getEntry(knew); + modelSecondDerivativesParameters.setEntry(knew, ZERO); + for (int i = 0; i < n; i++) { + final double temp = pqold * interpolationPoints.getEntry(knew, i); + for (int j = 0; j <= i; j++) { + modelSecondDerivativesValues.setEntry(ih, modelSecondDerivativesValues.getEntry(ih) + temp * interpolationPoints.getEntry(knew, j)); + ih++; + } + } + for (int m = 0; m < nptm; m++) { + final double temp = diff * zMatrix.getEntry(knew, m); + for (int k = 0; k < npt; k++) { + modelSecondDerivativesParameters.setEntry(k, modelSecondDerivativesParameters.getEntry(k) + temp * zMatrix.getEntry(k, m)); + } + } + + // Include the new interpolation point, and make the changes to GOPT at + // the old XOPT that are caused by the updating of the quadratic model. + + fAtInterpolationPoints.setEntry(knew, f); + for (int i = 0; i < n; i++) { + interpolationPoints.setEntry(knew, i, newPoint.getEntry(i)); + work1.setEntry(i, bMatrix.getEntry(knew, i)); + } + for (int k = 0; k < npt; k++) { + double suma = ZERO; + for (int m = 0; m < nptm; m++) { + suma += zMatrix.getEntry(knew, m) * zMatrix.getEntry(k, m); + } + double sumb = ZERO; + for (int j = 0; j < n; j++) { + sumb += interpolationPoints.getEntry(k, j) * trustRegionCenterOffset.getEntry(j); + } + final double temp = suma * sumb; + for (int i = 0; i < n; i++) { + work1.setEntry(i, work1.getEntry(i) + temp * interpolationPoints.getEntry(k, i)); + } + } + for (int i = 0; i < n; i++) { + gradientAtTrustRegionCenter.setEntry(i, gradientAtTrustRegionCenter.getEntry(i) + diff * work1.getEntry(i)); + } + + // Update XOPT, GOPT and KOPT if the new calculated F is less than FOPT. + + if (f < fopt) { + trustRegionCenterInterpolationPointIndex = knew; + xoptsq = ZERO; + ih = 0; + for (int j = 0; j < n; j++) { + trustRegionCenterOffset.setEntry(j, newPoint.getEntry(j)); + // Computing 2nd power + final double d1 = trustRegionCenterOffset.getEntry(j); + xoptsq += d1 * d1; + for (int i = 0; i <= j; i++) { + if (i < j) { + gradientAtTrustRegionCenter.setEntry(j, gradientAtTrustRegionCenter.getEntry(j) + modelSecondDerivativesValues.getEntry(ih) * trialStepPoint.getEntry(i)); + } + gradientAtTrustRegionCenter.setEntry(i, gradientAtTrustRegionCenter.getEntry(i) + modelSecondDerivativesValues.getEntry(ih) * trialStepPoint.getEntry(j)); + ih++; + } + } + for (int k = 0; k < npt; k++) { + double temp = ZERO; + for (int j = 0; j < n; j++) { + temp += interpolationPoints.getEntry(k, j) * trialStepPoint.getEntry(j); + } + temp *= modelSecondDerivativesParameters.getEntry(k); + for (int i = 0; i < n; i++) { + gradientAtTrustRegionCenter.setEntry(i, gradientAtTrustRegionCenter.getEntry(i) + temp * interpolationPoints.getEntry(k, i)); + } + } + } + + // Calculate the parameters of the least Frobenius norm interpolant to + // the current data, the gradient of this interpolant at XOPT being put + // into VLAG(NPT+I), I=1,2,...,N. + + if (ntrits > 0) { + for (int k = 0; k < npt; k++) { + lagrangeValuesAtNewPoint.setEntry(k, fAtInterpolationPoints.getEntry(k) - fAtInterpolationPoints.getEntry(trustRegionCenterInterpolationPointIndex)); + work3.setEntry(k, ZERO); + } + for (int j = 0; j < nptm; j++) { + double sum = ZERO; + for (int k = 0; k < npt; k++) { + sum += zMatrix.getEntry(k, j) * lagrangeValuesAtNewPoint.getEntry(k); + } + for (int k = 0; k < npt; k++) { + work3.setEntry(k, work3.getEntry(k) + sum * zMatrix.getEntry(k, j)); + } + } + for (int k = 0; k < npt; k++) { + double sum = ZERO; + for (int j = 0; j < n; j++) { + sum += interpolationPoints.getEntry(k, j) * trustRegionCenterOffset.getEntry(j); + } + work2.setEntry(k, work3.getEntry(k)); + work3.setEntry(k, sum * work3.getEntry(k)); + } + double gqsq = ZERO; + double gisq = ZERO; + for (int i = 0; i < n; i++) { + double sum = ZERO; + for (int k = 0; k < npt; k++) { + sum += bMatrix.getEntry(k, i) * + lagrangeValuesAtNewPoint.getEntry(k) + interpolationPoints.getEntry(k, i) * work3.getEntry(k); + } + if (trustRegionCenterOffset.getEntry(i) == lowerDifference.getEntry(i)) { + // Computing MIN + // Computing 2nd power + final double d1 = FastMath.min(ZERO, gradientAtTrustRegionCenter.getEntry(i)); + gqsq += d1 * d1; + // Computing 2nd power + final double d2 = FastMath.min(ZERO, sum); + gisq += d2 * d2; + } else if (trustRegionCenterOffset.getEntry(i) == upperDifference.getEntry(i)) { + // Computing MAX + // Computing 2nd power + final double d1 = FastMath.max(ZERO, gradientAtTrustRegionCenter.getEntry(i)); + gqsq += d1 * d1; + // Computing 2nd power + final double d2 = FastMath.max(ZERO, sum); + gisq += d2 * d2; + } else { + // Computing 2nd power + final double d1 = gradientAtTrustRegionCenter.getEntry(i); + gqsq += d1 * d1; + gisq += sum * sum; + } + lagrangeValuesAtNewPoint.setEntry(npt + i, sum); + } + + // Test whether to replace the new quadratic model by the least Frobenius + // norm interpolant, making the replacement if the test is satisfied. + + ++itest; + if (gqsq < TEN * gisq) { + itest = 0; + } + if (itest >= 3) { + for (int i = 0, max = FastMath.max(npt, nh); i < max; i++) { + if (i < n) { + gradientAtTrustRegionCenter.setEntry(i, lagrangeValuesAtNewPoint.getEntry(npt + i)); + } + if (i < npt) { + modelSecondDerivativesParameters.setEntry(i, work2.getEntry(i)); + } + if (i < nh) { + modelSecondDerivativesValues.setEntry(i, ZERO); + } + itest = 0; + } + } + } + + // If a trust region step has provided a sufficient decrease in F, then + // branch for another trust region calculation. The case NTRITS=0 occurs + // when the new interpolation point was reached by an alternative step. + + if (ntrits == 0) { + state = 60; break; + } + if (f <= fopt + ONE_OVER_TEN * vquad) { + state = 60; break; + } + + // Alternatively, find out if the interpolation points are close enough + // to the best point so far. + + // Computing MAX + // Computing 2nd power + final double d1 = TWO * delta; + // Computing 2nd power + final double d2 = TEN * rho; + distsq = FastMath.max(d1 * d1, d2 * d2); + } + case 650: { + printState(650); // XXX + knew = -1; + for (int k = 0; k < npt; k++) { + double sum = ZERO; + for (int j = 0; j < n; j++) { + // Computing 2nd power + final double d1 = interpolationPoints.getEntry(k, j) - trustRegionCenterOffset.getEntry(j); + sum += d1 * d1; + } + if (sum > distsq) { + knew = k; + distsq = sum; + } + } + + // If KNEW is positive, then ALTMOV finds alternative new positions for + // the KNEW-th interpolation point within distance ADELT of XOPT. It is + // reached via label 90. Otherwise, there is a branch to label 60 for + // another trust region iteration, unless the calculations with the + // current RHO are complete. + + if (knew >= 0) { + final double dist = FastMath.sqrt(distsq); + if (ntrits == -1) { + // Computing MIN + delta = FastMath.min(ONE_OVER_TEN * delta, HALF * dist); + if (delta <= rho * 1.5) { + delta = rho; + } + } + ntrits = 0; + // Computing MAX + // Computing MIN + final double d1 = FastMath.min(ONE_OVER_TEN * dist, delta); + adelt = FastMath.max(d1, rho); + dsq = adelt * adelt; + state = 90; break; + } + if (ntrits == -1) { + state = 680; break; + } + if (ratio > ZERO) { + state = 60; break; + } + if (FastMath.max(delta, dnorm) > rho) { + state = 60; break; + } + + // The calculations with the current value of RHO are complete. Pick the + // next values of RHO and DELTA. + } + case 680: { + printState(680); // XXX + if (rho > stoppingTrustRegionRadius) { + delta = HALF * rho; + ratio = rho / stoppingTrustRegionRadius; + if (ratio <= SIXTEEN) { + rho = stoppingTrustRegionRadius; + } else if (ratio <= TWO_HUNDRED_FIFTY) { + rho = FastMath.sqrt(ratio) * stoppingTrustRegionRadius; + } else { + rho *= ONE_OVER_TEN; + } + delta = FastMath.max(delta, rho); + ntrits = 0; + nfsav = getEvaluations(); + state = 60; break; + } + + // Return from the calculation, after another Newton-Raphson step, if + // it is too short to have been tried before. + + if (ntrits == -1) { + state = 360; break; + } + } + case 720: { + printState(720); // XXX + if (fAtInterpolationPoints.getEntry(trustRegionCenterInterpolationPointIndex) <= fsave) { + for (int i = 0; i < n; i++) { + // Computing MIN + // Computing MAX + final double d3 = lowerBound[i]; + final double d4 = originShift.getEntry(i) + trustRegionCenterOffset.getEntry(i); + final double d1 = FastMath.max(d3, d4); + final double d2 = upperBound[i]; + currentBest.setEntry(i, FastMath.min(d1, d2)); + if (trustRegionCenterOffset.getEntry(i) == lowerDifference.getEntry(i)) { + currentBest.setEntry(i, lowerBound[i]); + } + if (trustRegionCenterOffset.getEntry(i) == upperDifference.getEntry(i)) { + currentBest.setEntry(i, upperBound[i]); + } + } + f = fAtInterpolationPoints.getEntry(trustRegionCenterInterpolationPointIndex); + } + return f; + } + default: { + throw new MathIllegalStateException(LocalizedFormats.SIMPLE_MESSAGE, "bobyqb"); + }}} + } // bobyqb + + // ---------------------------------------------------------------------------------------- + + /** + * The arguments N, NPT, XPT, XOPT, BMAT, ZMAT, NDIM, SL and SU all have + * the same meanings as the corresponding arguments of BOBYQB. + * KOPT is the index of the optimal interpolation point. + * KNEW is the index of the interpolation point that is going to be moved. + * ADELT is the current trust region bound. + * XNEW will be set to a suitable new position for the interpolation point + * XPT(KNEW,.). Specifically, it satisfies the SL, SU and trust region + * bounds and it should provide a large denominator in the next call of + * UPDATE. The step XNEW-XOPT from XOPT is restricted to moves along the + * straight lines through XOPT and another interpolation point. + * XALT also provides a large value of the modulus of the KNEW-th Lagrange + * function subject to the constraints that have been mentioned, its main + * difference from XNEW being that XALT-XOPT is a constrained version of + * the Cauchy step within the trust region. An exception is that XALT is + * not calculated if all components of GLAG (see below) are zero. + * ALPHA will be set to the KNEW-th diagonal element of the H matrix. + * CAUCHY will be set to the square of the KNEW-th Lagrange function at + * the step XALT-XOPT from XOPT for the vector XALT that is returned, + * except that CAUCHY is set to zero if XALT is not calculated. + * GLAG is a working space vector of length N for the gradient of the + * KNEW-th Lagrange function at XOPT. + * HCOL is a working space vector of length NPT for the second derivative + * coefficients of the KNEW-th Lagrange function. + * W is a working space vector of length 2N that is going to hold the + * constrained Cauchy step from XOPT of the Lagrange function, followed + * by the downhill version of XALT when the uphill step is calculated. + * + * Set the first NPT components of W to the leading elements of the + * KNEW-th column of the H matrix. + * @param knew + * @param adelt + */ + private double[] altmov( + int knew, + double adelt + ) { + printMethod(); // XXX + + final int n = currentBest.getDimension(); + final int npt = numberOfInterpolationPoints; + + final ArrayRealVector glag = new ArrayRealVector(n); + final ArrayRealVector hcol = new ArrayRealVector(npt); + + final ArrayRealVector work1 = new ArrayRealVector(n); + final ArrayRealVector work2 = new ArrayRealVector(n); + + for (int k = 0; k < npt; k++) { + hcol.setEntry(k, ZERO); + } + for (int j = 0, max = npt - n - 1; j < max; j++) { + final double tmp = zMatrix.getEntry(knew, j); + for (int k = 0; k < npt; k++) { + hcol.setEntry(k, hcol.getEntry(k) + tmp * zMatrix.getEntry(k, j)); + } + } + final double alpha = hcol.getEntry(knew); + final double ha = HALF * alpha; + + // Calculate the gradient of the KNEW-th Lagrange function at XOPT. + + for (int i = 0; i < n; i++) { + glag.setEntry(i, bMatrix.getEntry(knew, i)); + } + for (int k = 0; k < npt; k++) { + double tmp = ZERO; + for (int j = 0; j < n; j++) { + tmp += interpolationPoints.getEntry(k, j) * trustRegionCenterOffset.getEntry(j); + } + tmp *= hcol.getEntry(k); + for (int i = 0; i < n; i++) { + glag.setEntry(i, glag.getEntry(i) + tmp * interpolationPoints.getEntry(k, i)); + } + } + + // Search for a large denominator along the straight lines through XOPT + // and another interpolation point. SLBD and SUBD will be lower and upper + // bounds on the step along each of these lines in turn. PREDSQ will be + // set to the square of the predicted denominator for each line. PRESAV + // will be set to the largest admissible value of PREDSQ that occurs. + + double presav = ZERO; + double step = Double.NaN; + int ksav = 0; + int ibdsav = 0; + double stpsav = 0; + for (int k = 0; k < npt; k++) { + if (k == trustRegionCenterInterpolationPointIndex) { + continue; + } + double dderiv = ZERO; + double distsq = ZERO; + for (int i = 0; i < n; i++) { + final double tmp = interpolationPoints.getEntry(k, i) - trustRegionCenterOffset.getEntry(i); + dderiv += glag.getEntry(i) * tmp; + distsq += tmp * tmp; + } + double subd = adelt / FastMath.sqrt(distsq); + double slbd = -subd; + int ilbd = 0; + int iubd = 0; + final double sumin = FastMath.min(ONE, subd); + + // Revise SLBD and SUBD if necessary because of the bounds in SL and SU. + + for (int i = 0; i < n; i++) { + final double tmp = interpolationPoints.getEntry(k, i) - trustRegionCenterOffset.getEntry(i); + if (tmp > ZERO) { + if (slbd * tmp < lowerDifference.getEntry(i) - trustRegionCenterOffset.getEntry(i)) { + slbd = (lowerDifference.getEntry(i) - trustRegionCenterOffset.getEntry(i)) / tmp; + ilbd = -i - 1; + } + if (subd * tmp > upperDifference.getEntry(i) - trustRegionCenterOffset.getEntry(i)) { + // Computing MAX + subd = FastMath.max(sumin, + (upperDifference.getEntry(i) - trustRegionCenterOffset.getEntry(i)) / tmp); + iubd = i + 1; + } + } else if (tmp < ZERO) { + if (slbd * tmp > upperDifference.getEntry(i) - trustRegionCenterOffset.getEntry(i)) { + slbd = (upperDifference.getEntry(i) - trustRegionCenterOffset.getEntry(i)) / tmp; + ilbd = i + 1; + } + if (subd * tmp < lowerDifference.getEntry(i) - trustRegionCenterOffset.getEntry(i)) { + // Computing MAX + subd = FastMath.max(sumin, + (lowerDifference.getEntry(i) - trustRegionCenterOffset.getEntry(i)) / tmp); + iubd = -i - 1; + } + } + } + + // Seek a large modulus of the KNEW-th Lagrange function when the index + // of the other interpolation point on the line through XOPT is KNEW. + + step = slbd; + int isbd = ilbd; + double vlag = Double.NaN; + if (k == knew) { + final double diff = dderiv - ONE; + vlag = slbd * (dderiv - slbd * diff); + final double d1 = subd * (dderiv - subd * diff); + if (FastMath.abs(d1) > FastMath.abs(vlag)) { + step = subd; + vlag = d1; + isbd = iubd; + } + final double d2 = HALF * dderiv; + final double d3 = d2 - diff * slbd; + final double d4 = d2 - diff * subd; + if (d3 * d4 < ZERO) { + final double d5 = d2 * d2 / diff; + if (FastMath.abs(d5) > FastMath.abs(vlag)) { + step = d2 / diff; + vlag = d5; + isbd = 0; + } + } + + // Search along each of the other lines through XOPT and another point. + + } else { + vlag = slbd * (ONE - slbd); + final double tmp = subd * (ONE - subd); + if (FastMath.abs(tmp) > FastMath.abs(vlag)) { + step = subd; + vlag = tmp; + isbd = iubd; + } + if (subd > HALF && FastMath.abs(vlag) < ONE_OVER_FOUR) { + step = HALF; + vlag = ONE_OVER_FOUR; + isbd = 0; + } + vlag *= dderiv; + } + + // Calculate PREDSQ for the current line search and maintain PRESAV. + + final double tmp = step * (ONE - step) * distsq; + final double predsq = vlag * vlag * (vlag * vlag + ha * tmp * tmp); + if (predsq > presav) { + presav = predsq; + ksav = k; + stpsav = step; + ibdsav = isbd; + } + } + + // Construct XNEW in a way that satisfies the bound constraints exactly. + + for (int i = 0; i < n; i++) { + final double tmp = trustRegionCenterOffset.getEntry(i) + stpsav * (interpolationPoints.getEntry(ksav, i) - trustRegionCenterOffset.getEntry(i)); + newPoint.setEntry(i, FastMath.max(lowerDifference.getEntry(i), + FastMath.min(upperDifference.getEntry(i), tmp))); + } + if (ibdsav < 0) { + newPoint.setEntry(-ibdsav - 1, lowerDifference.getEntry(-ibdsav - 1)); + } + if (ibdsav > 0) { + newPoint.setEntry(ibdsav - 1, upperDifference.getEntry(ibdsav - 1)); + } + + // Prepare for the iterative method that assembles the constrained Cauchy + // step in W. The sum of squares of the fixed components of W is formed in + // WFIXSQ, and the free components of W are set to BIGSTP. + + final double bigstp = adelt + adelt; + int iflag = 0; + double cauchy = Double.NaN; + double csave = ZERO; + while (true) { + double wfixsq = ZERO; + double ggfree = ZERO; + for (int i = 0; i < n; i++) { + final double glagValue = glag.getEntry(i); + work1.setEntry(i, ZERO); + if (FastMath.min(trustRegionCenterOffset.getEntry(i) - lowerDifference.getEntry(i), glagValue) > ZERO || + FastMath.max(trustRegionCenterOffset.getEntry(i) - upperDifference.getEntry(i), glagValue) < ZERO) { + work1.setEntry(i, bigstp); + // Computing 2nd power + ggfree += glagValue * glagValue; + } + } + if (ggfree == ZERO) { + return new double[] { alpha, ZERO }; + } + + // Investigate whether more components of W can be fixed. + final double tmp1 = adelt * adelt - wfixsq; + if (tmp1 > ZERO) { + step = FastMath.sqrt(tmp1 / ggfree); + ggfree = ZERO; + for (int i = 0; i < n; i++) { + if (work1.getEntry(i) == bigstp) { + final double tmp2 = trustRegionCenterOffset.getEntry(i) - step * glag.getEntry(i); + if (tmp2 <= lowerDifference.getEntry(i)) { + work1.setEntry(i, lowerDifference.getEntry(i) - trustRegionCenterOffset.getEntry(i)); + // Computing 2nd power + final double d1 = work1.getEntry(i); + wfixsq += d1 * d1; + } else if (tmp2 >= upperDifference.getEntry(i)) { + work1.setEntry(i, upperDifference.getEntry(i) - trustRegionCenterOffset.getEntry(i)); + // Computing 2nd power + final double d1 = work1.getEntry(i); + wfixsq += d1 * d1; + } else { + // Computing 2nd power + final double d1 = glag.getEntry(i); + ggfree += d1 * d1; + } + } + } + } + + // Set the remaining free components of W and all components of XALT, + // except that W may be scaled later. + + double gw = ZERO; + for (int i = 0; i < n; i++) { + final double glagValue = glag.getEntry(i); + if (work1.getEntry(i) == bigstp) { + work1.setEntry(i, -step * glagValue); + final double min = FastMath.min(upperDifference.getEntry(i), + trustRegionCenterOffset.getEntry(i) + work1.getEntry(i)); + alternativeNewPoint.setEntry(i, FastMath.max(lowerDifference.getEntry(i), min)); + } else if (work1.getEntry(i) == ZERO) { + alternativeNewPoint.setEntry(i, trustRegionCenterOffset.getEntry(i)); + } else if (glagValue > ZERO) { + alternativeNewPoint.setEntry(i, lowerDifference.getEntry(i)); + } else { + alternativeNewPoint.setEntry(i, upperDifference.getEntry(i)); + } + gw += glagValue * work1.getEntry(i); + } + + // Set CURV to the curvature of the KNEW-th Lagrange function along W. + // Scale W by a factor less than one if that can reduce the modulus of + // the Lagrange function at XOPT+W. Set CAUCHY to the final value of + // the square of this function. + + double curv = ZERO; + for (int k = 0; k < npt; k++) { + double tmp = ZERO; + for (int j = 0; j < n; j++) { + tmp += interpolationPoints.getEntry(k, j) * work1.getEntry(j); + } + curv += hcol.getEntry(k) * tmp * tmp; + } + if (iflag == 1) { + curv = -curv; + } + if (curv > -gw && + curv < -gw * (ONE + FastMath.sqrt(TWO))) { + final double scale = -gw / curv; + for (int i = 0; i < n; i++) { + final double tmp = trustRegionCenterOffset.getEntry(i) + scale * work1.getEntry(i); + alternativeNewPoint.setEntry(i, FastMath.max(lowerDifference.getEntry(i), + FastMath.min(upperDifference.getEntry(i), tmp))); + } + // Computing 2nd power + final double d1 = HALF * gw * scale; + cauchy = d1 * d1; + } else { + // Computing 2nd power + final double d1 = gw + HALF * curv; + cauchy = d1 * d1; + } + + // If IFLAG is zero, then XALT is calculated as before after reversing + // the sign of GLAG. Thus two XALT vectors become available. The one that + // is chosen is the one that gives the larger value of CAUCHY. + + if (iflag == 0) { + for (int i = 0; i < n; i++) { + glag.setEntry(i, -glag.getEntry(i)); + work2.setEntry(i, alternativeNewPoint.getEntry(i)); + } + csave = cauchy; + iflag = 1; + } else { + break; + } + } + if (csave > cauchy) { + for (int i = 0; i < n; i++) { + alternativeNewPoint.setEntry(i, work2.getEntry(i)); + } + cauchy = csave; + } + + return new double[] { alpha, cauchy }; + } // altmov + + // ---------------------------------------------------------------------------------------- + + /** + * SUBROUTINE PRELIM sets the elements of XBASE, XPT, FVAL, GOPT, HQ, PQ, + * BMAT and ZMAT for the first iteration, and it maintains the values of + * NF and KOPT. The vector X is also changed by PRELIM. + * + * The arguments N, NPT, X, XL, XU, RHOBEG, IPRINT and MAXFUN are the + * same as the corresponding arguments in SUBROUTINE BOBYQA. + * The arguments XBASE, XPT, FVAL, HQ, PQ, BMAT, ZMAT, NDIM, SL and SU + * are the same as the corresponding arguments in BOBYQB, the elements + * of SL and SU being set in BOBYQA. + * GOPT is usually the gradient of the quadratic model at XOPT+XBASE, but + * it is set by PRELIM to the gradient of the quadratic model at XBASE. + * If XOPT is nonzero, BOBYQB will change it to its usual value later. + * NF is maintaned as the number of calls of CALFUN so far. + * KOPT will be such that the least calculated value of F so far is at + * the point XPT(KOPT,.)+XBASE in the space of the variables. + * + * @param lowerBound Lower bounds. + * @param upperBound Upper bounds. + */ + private void prelim(double[] lowerBound, + double[] upperBound) { + printMethod(); // XXX + + final int n = currentBest.getDimension(); + final int npt = numberOfInterpolationPoints; + final int ndim = bMatrix.getRowDimension(); + + final double rhosq = initialTrustRegionRadius * initialTrustRegionRadius; + final double recip = 1d / rhosq; + final int np = n + 1; + + // Set XBASE to the initial vector of variables, and set the initial + // elements of XPT, BMAT, HQ, PQ and ZMAT to zero. + + for (int j = 0; j < n; j++) { + originShift.setEntry(j, currentBest.getEntry(j)); + for (int k = 0; k < npt; k++) { + interpolationPoints.setEntry(k, j, ZERO); + } + for (int i = 0; i < ndim; i++) { + bMatrix.setEntry(i, j, ZERO); + } + } + for (int i = 0, max = n * np / 2; i < max; i++) { + modelSecondDerivativesValues.setEntry(i, ZERO); + } + for (int k = 0; k < npt; k++) { + modelSecondDerivativesParameters.setEntry(k, ZERO); + for (int j = 0, max = npt - np; j < max; j++) { + zMatrix.setEntry(k, j, ZERO); + } + } + + // Begin the initialization procedure. NF becomes one more than the number + // of function values so far. The coordinates of the displacement of the + // next initial interpolation point from XBASE are set in XPT(NF+1,.). + + int ipt = 0; + int jpt = 0; + double fbeg = Double.NaN; + do { + final int nfm = getEvaluations(); + final int nfx = nfm - n; + final int nfmm = nfm - 1; + final int nfxm = nfx - 1; + double stepa = 0; + double stepb = 0; + if (nfm <= 2 * n) { + if (nfm >= 1 && + nfm <= n) { + stepa = initialTrustRegionRadius; + if (upperDifference.getEntry(nfmm) == ZERO) { + stepa = -stepa; + // throw new PathIsExploredException(); // XXX + } + interpolationPoints.setEntry(nfm, nfmm, stepa); + } else if (nfm > n) { + stepa = interpolationPoints.getEntry(nfx, nfxm); + stepb = -initialTrustRegionRadius; + if (lowerDifference.getEntry(nfxm) == ZERO) { + stepb = FastMath.min(TWO * initialTrustRegionRadius, upperDifference.getEntry(nfxm)); + // throw new PathIsExploredException(); // XXX + } + if (upperDifference.getEntry(nfxm) == ZERO) { + stepb = FastMath.max(-TWO * initialTrustRegionRadius, lowerDifference.getEntry(nfxm)); + // throw new PathIsExploredException(); // XXX + } + interpolationPoints.setEntry(nfm, nfxm, stepb); + } + } else { + final int tmp1 = (nfm - np) / n; + jpt = nfm - tmp1 * n - n; + ipt = jpt + tmp1; + if (ipt > n) { + final int tmp2 = jpt; + jpt = ipt - n; + ipt = tmp2; +// throw new PathIsExploredException(); // XXX + } + final int iptMinus1 = ipt - 1; + final int jptMinus1 = jpt - 1; + interpolationPoints.setEntry(nfm, iptMinus1, interpolationPoints.getEntry(ipt, iptMinus1)); + interpolationPoints.setEntry(nfm, jptMinus1, interpolationPoints.getEntry(jpt, jptMinus1)); + } + + // Calculate the next value of F. The least function value so far and + // its index are required. + + for (int j = 0; j < n; j++) { + currentBest.setEntry(j, FastMath.min(FastMath.max(lowerBound[j], + originShift.getEntry(j) + interpolationPoints.getEntry(nfm, j)), + upperBound[j])); + if (interpolationPoints.getEntry(nfm, j) == lowerDifference.getEntry(j)) { + currentBest.setEntry(j, lowerBound[j]); + } + if (interpolationPoints.getEntry(nfm, j) == upperDifference.getEntry(j)) { + currentBest.setEntry(j, upperBound[j]); + } + } + + final double objectiveValue = computeObjectiveValue(currentBest.toArray()); + final double f = isMinimize ? objectiveValue : -objectiveValue; + final int numEval = getEvaluations(); // nfm + 1 + fAtInterpolationPoints.setEntry(nfm, f); + + if (numEval == 1) { + fbeg = f; + trustRegionCenterInterpolationPointIndex = 0; + } else if (f < fAtInterpolationPoints.getEntry(trustRegionCenterInterpolationPointIndex)) { + trustRegionCenterInterpolationPointIndex = nfm; + } + + // Set the nonzero initial elements of BMAT and the quadratic model in the + // cases when NF is at most 2*N+1. If NF exceeds N+1, then the positions + // of the NF-th and (NF-N)-th interpolation points may be switched, in + // order that the function value at the first of them contributes to the + // off-diagonal second derivative terms of the initial quadratic model. + + if (numEval <= 2 * n + 1) { + if (numEval >= 2 && + numEval <= n + 1) { + gradientAtTrustRegionCenter.setEntry(nfmm, (f - fbeg) / stepa); + if (npt < numEval + n) { + final double oneOverStepA = ONE / stepa; + bMatrix.setEntry(0, nfmm, -oneOverStepA); + bMatrix.setEntry(nfm, nfmm, oneOverStepA); + bMatrix.setEntry(npt + nfmm, nfmm, -HALF * rhosq); + // throw new PathIsExploredException(); // XXX + } + } else if (numEval >= n + 2) { + final int ih = nfx * (nfx + 1) / 2 - 1; + final double tmp = (f - fbeg) / stepb; + final double diff = stepb - stepa; + modelSecondDerivativesValues.setEntry(ih, TWO * (tmp - gradientAtTrustRegionCenter.getEntry(nfxm)) / diff); + gradientAtTrustRegionCenter.setEntry(nfxm, (gradientAtTrustRegionCenter.getEntry(nfxm) * stepb - tmp * stepa) / diff); + if (stepa * stepb < ZERO && f < fAtInterpolationPoints.getEntry(nfm - n)) { + fAtInterpolationPoints.setEntry(nfm, fAtInterpolationPoints.getEntry(nfm - n)); + fAtInterpolationPoints.setEntry(nfm - n, f); + if (trustRegionCenterInterpolationPointIndex == nfm) { + trustRegionCenterInterpolationPointIndex = nfm - n; + } + interpolationPoints.setEntry(nfm - n, nfxm, stepb); + interpolationPoints.setEntry(nfm, nfxm, stepa); + } + bMatrix.setEntry(0, nfxm, -(stepa + stepb) / (stepa * stepb)); + bMatrix.setEntry(nfm, nfxm, -HALF / interpolationPoints.getEntry(nfm - n, nfxm)); + bMatrix.setEntry(nfm - n, nfxm, + -bMatrix.getEntry(0, nfxm) - bMatrix.getEntry(nfm, nfxm)); + zMatrix.setEntry(0, nfxm, FastMath.sqrt(TWO) / (stepa * stepb)); + zMatrix.setEntry(nfm, nfxm, FastMath.sqrt(HALF) / rhosq); + // zMatrix.setEntry(nfm, nfxm, FastMath.sqrt(HALF) * recip); // XXX "testAckley" and "testDiffPow" fail. + zMatrix.setEntry(nfm - n, nfxm, + -zMatrix.getEntry(0, nfxm) - zMatrix.getEntry(nfm, nfxm)); + } + + // Set the off-diagonal second derivatives of the Lagrange functions and + // the initial quadratic model. + + } else { + zMatrix.setEntry(0, nfxm, recip); + zMatrix.setEntry(nfm, nfxm, recip); + zMatrix.setEntry(ipt, nfxm, -recip); + zMatrix.setEntry(jpt, nfxm, -recip); + + final int ih = ipt * (ipt - 1) / 2 + jpt - 1; + final double tmp = interpolationPoints.getEntry(nfm, ipt - 1) * interpolationPoints.getEntry(nfm, jpt - 1); + modelSecondDerivativesValues.setEntry(ih, (fbeg - fAtInterpolationPoints.getEntry(ipt) - fAtInterpolationPoints.getEntry(jpt) + f) / tmp); +// throw new PathIsExploredException(); // XXX + } + } while (getEvaluations() < npt); + } // prelim + + + // ---------------------------------------------------------------------------------------- + + /** + * A version of the truncated conjugate gradient is applied. If a line + * search is restricted by a constraint, then the procedure is restarted, + * the values of the variables that are at their bounds being fixed. If + * the trust region boundary is reached, then further changes may be made + * to D, each one being in the two dimensional space that is spanned + * by the current D and the gradient of Q at XOPT+D, staying on the trust + * region boundary. Termination occurs when the reduction in Q seems to + * be close to the greatest reduction that can be achieved. + * The arguments N, NPT, XPT, XOPT, GOPT, HQ, PQ, SL and SU have the same + * meanings as the corresponding arguments of BOBYQB. + * DELTA is the trust region radius for the present calculation, which + * seeks a small value of the quadratic model within distance DELTA of + * XOPT subject to the bounds on the variables. + * XNEW will be set to a new vector of variables that is approximately + * the one that minimizes the quadratic model within the trust region + * subject to the SL and SU constraints on the variables. It satisfies + * as equations the bounds that become active during the calculation. + * D is the calculated trial step from XOPT, generated iteratively from an + * initial value of zero. Thus XNEW is XOPT+D after the final iteration. + * GNEW holds the gradient of the quadratic model at XOPT+D. It is updated + * when D is updated. + * xbdi.get( is a working space vector. For I=1,2,...,N, the element xbdi.get((I) is + * set to -1.0, 0.0, or 1.0, the value being nonzero if and only if the + * I-th variable has become fixed at a bound, the bound being SL(I) or + * SU(I) in the case xbdi.get((I)=-1.0 or xbdi.get((I)=1.0, respectively. This + * information is accumulated during the construction of XNEW. + * The arrays S, HS and HRED are also used for working space. They hold the + * current search direction, and the changes in the gradient of Q along S + * and the reduced D, respectively, where the reduced D is the same as D, + * except that the components of the fixed variables are zero. + * DSQ will be set to the square of the length of XNEW-XOPT. + * CRVMIN is set to zero if D reaches the trust region boundary. Otherwise + * it is set to the least curvature of H that occurs in the conjugate + * gradient searches that are not restricted by any constraints. The + * value CRVMIN=-1.0D0 is set, however, if all of these searches are + * constrained. + * @param delta + * @param gnew + * @param xbdi + * @param s + * @param hs + * @param hred + */ + private double[] trsbox( + double delta, + ArrayRealVector gnew, + ArrayRealVector xbdi, + ArrayRealVector s, + ArrayRealVector hs, + ArrayRealVector hred + ) { + printMethod(); // XXX + + final int n = currentBest.getDimension(); + final int npt = numberOfInterpolationPoints; + + double dsq = Double.NaN; + double crvmin = Double.NaN; + + // Local variables + double ds; + int iu; + double dhd, dhs, cth, shs, sth, ssq, beta=0, sdec, blen; + int iact = -1; + int nact = 0; + double angt = 0, qred; + int isav; + double temp = 0, xsav = 0, xsum = 0, angbd = 0, dredg = 0, sredg = 0; + int iterc; + double resid = 0, delsq = 0, ggsav = 0, tempa = 0, tempb = 0, + redmax = 0, dredsq = 0, redsav = 0, gredsq = 0, rednew = 0; + int itcsav = 0; + double rdprev = 0, rdnext = 0, stplen = 0, stepsq = 0; + int itermax = 0; + + // Set some constants. + + // Function Body + + // The sign of GOPT(I) gives the sign of the change to the I-th variable + // that will reduce Q from its value at XOPT. Thus xbdi.get((I) shows whether + // or not to fix the I-th variable at one of its bounds initially, with + // NACT being set to the number of fixed variables. D and GNEW are also + // set for the first iteration. DELSQ is the upper bound on the sum of + // squares of the free variables. QRED is the reduction in Q so far. + + iterc = 0; + nact = 0; + for (int i = 0; i < n; i++) { + xbdi.setEntry(i, ZERO); + if (trustRegionCenterOffset.getEntry(i) <= lowerDifference.getEntry(i)) { + if (gradientAtTrustRegionCenter.getEntry(i) >= ZERO) { + xbdi.setEntry(i, MINUS_ONE); + } + } else if (trustRegionCenterOffset.getEntry(i) >= upperDifference.getEntry(i) && + gradientAtTrustRegionCenter.getEntry(i) <= ZERO) { + xbdi.setEntry(i, ONE); + } + if (xbdi.getEntry(i) != ZERO) { + ++nact; + } + trialStepPoint.setEntry(i, ZERO); + gnew.setEntry(i, gradientAtTrustRegionCenter.getEntry(i)); + } + delsq = delta * delta; + qred = ZERO; + crvmin = MINUS_ONE; + + // Set the next search direction of the conjugate gradient method. It is + // the steepest descent direction initially and when the iterations are + // restarted because a variable has just been fixed by a bound, and of + // course the components of the fixed variables are zero. ITERMAX is an + // upper bound on the indices of the conjugate gradient iterations. + + int state = 20; + for(;;) { + switch (state) { + case 20: { + printState(20); // XXX + beta = ZERO; + } + case 30: { + printState(30); // XXX + stepsq = ZERO; + for (int i = 0; i < n; i++) { + if (xbdi.getEntry(i) != ZERO) { + s.setEntry(i, ZERO); + } else if (beta == ZERO) { + s.setEntry(i, -gnew.getEntry(i)); + } else { + s.setEntry(i, beta * s.getEntry(i) - gnew.getEntry(i)); + } + // Computing 2nd power + final double d1 = s.getEntry(i); + stepsq += d1 * d1; + } + if (stepsq == ZERO) { + state = 190; break; + } + if (beta == ZERO) { + gredsq = stepsq; + itermax = iterc + n - nact; + } + if (gredsq * delsq <= qred * 1e-4 * qred) { + state = 190; break; + } + + // Multiply the search direction by the second derivative matrix of Q and + // calculate some scalars for the choice of steplength. Then set BLEN to + // the length of the the step to the trust region boundary and STPLEN to + // the steplength, ignoring the simple bounds. + + state = 210; break; + } + case 50: { + printState(50); // XXX + resid = delsq; + ds = ZERO; + shs = ZERO; + for (int i = 0; i < n; i++) { + if (xbdi.getEntry(i) == ZERO) { + // Computing 2nd power + final double d1 = trialStepPoint.getEntry(i); + resid -= d1 * d1; + ds += s.getEntry(i) * trialStepPoint.getEntry(i); + shs += s.getEntry(i) * hs.getEntry(i); + } + } + if (resid <= ZERO) { + state = 90; break; + } + temp = FastMath.sqrt(stepsq * resid + ds * ds); + if (ds < ZERO) { + blen = (temp - ds) / stepsq; + } else { + blen = resid / (temp + ds); + } + stplen = blen; + if (shs > ZERO) { + // Computing MIN + stplen = FastMath.min(blen, gredsq / shs); + } + + // Reduce STPLEN if necessary in order to preserve the simple bounds, + // letting IACT be the index of the new constrained variable. + + iact = -1; + for (int i = 0; i < n; i++) { + if (s.getEntry(i) != ZERO) { + xsum = trustRegionCenterOffset.getEntry(i) + trialStepPoint.getEntry(i); + if (s.getEntry(i) > ZERO) { + temp = (upperDifference.getEntry(i) - xsum) / s.getEntry(i); + } else { + temp = (lowerDifference.getEntry(i) - xsum) / s.getEntry(i); + } + if (temp < stplen) { + stplen = temp; + iact = i; + } + } + } + + // Update CRVMIN, GNEW and D. Set SDEC to the decrease that occurs in Q. + + sdec = ZERO; + if (stplen > ZERO) { + ++iterc; + temp = shs / stepsq; + if (iact == -1 && temp > ZERO) { + crvmin = FastMath.min(crvmin,temp); + if (crvmin == MINUS_ONE) { + crvmin = temp; + } + } + ggsav = gredsq; + gredsq = ZERO; + for (int i = 0; i < n; i++) { + gnew.setEntry(i, gnew.getEntry(i) + stplen * hs.getEntry(i)); + if (xbdi.getEntry(i) == ZERO) { + // Computing 2nd power + final double d1 = gnew.getEntry(i); + gredsq += d1 * d1; + } + trialStepPoint.setEntry(i, trialStepPoint.getEntry(i) + stplen * s.getEntry(i)); + } + // Computing MAX + final double d1 = stplen * (ggsav - HALF * stplen * shs); + sdec = FastMath.max(d1, ZERO); + qred += sdec; + } + + // Restart the conjugate gradient method if it has hit a new bound. + + if (iact >= 0) { + ++nact; + xbdi.setEntry(iact, ONE); + if (s.getEntry(iact) < ZERO) { + xbdi.setEntry(iact, MINUS_ONE); + } + // Computing 2nd power + final double d1 = trialStepPoint.getEntry(iact); + delsq -= d1 * d1; + if (delsq <= ZERO) { + state = 190; break; + } + state = 20; break; + } + + // If STPLEN is less than BLEN, then either apply another conjugate + // gradient iteration or RETURN. + + if (stplen < blen) { + if (iterc == itermax) { + state = 190; break; + } + if (sdec <= qred * .01) { + state = 190; break; + } + beta = gredsq / ggsav; + state = 30; break; + } + } + case 90: { + printState(90); // XXX + crvmin = ZERO; + + // Prepare for the alternative iteration by calculating some scalars + // and by multiplying the reduced D by the second derivative matrix of + // Q, where S holds the reduced D in the call of GGMULT. + + } + case 100: { + printState(100); // XXX + if (nact >= n - 1) { + state = 190; break; + } + dredsq = ZERO; + dredg = ZERO; + gredsq = ZERO; + for (int i = 0; i < n; i++) { + if (xbdi.getEntry(i) == ZERO) { + // Computing 2nd power + double d1 = trialStepPoint.getEntry(i); + dredsq += d1 * d1; + dredg += trialStepPoint.getEntry(i) * gnew.getEntry(i); + // Computing 2nd power + d1 = gnew.getEntry(i); + gredsq += d1 * d1; + s.setEntry(i, trialStepPoint.getEntry(i)); + } else { + s.setEntry(i, ZERO); + } + } + itcsav = iterc; + state = 210; break; + // Let the search direction S be a linear combination of the reduced D + // and the reduced G that is orthogonal to the reduced D. + } + case 120: { + printState(120); // XXX + ++iterc; + temp = gredsq * dredsq - dredg * dredg; + if (temp <= qred * 1e-4 * qred) { + state = 190; break; + } + temp = FastMath.sqrt(temp); + for (int i = 0; i < n; i++) { + if (xbdi.getEntry(i) == ZERO) { + s.setEntry(i, (dredg * trialStepPoint.getEntry(i) - dredsq * gnew.getEntry(i)) / temp); + } else { + s.setEntry(i, ZERO); + } + } + sredg = -temp; + + // By considering the simple bounds on the variables, calculate an upper + // bound on the tangent of half the angle of the alternative iteration, + // namely ANGBD, except that, if already a free variable has reached a + // bound, there is a branch back to label 100 after fixing that variable. + + angbd = ONE; + iact = -1; + for (int i = 0; i < n; i++) { + if (xbdi.getEntry(i) == ZERO) { + tempa = trustRegionCenterOffset.getEntry(i) + trialStepPoint.getEntry(i) - lowerDifference.getEntry(i); + tempb = upperDifference.getEntry(i) - trustRegionCenterOffset.getEntry(i) - trialStepPoint.getEntry(i); + if (tempa <= ZERO) { + ++nact; + xbdi.setEntry(i, MINUS_ONE); + state = 100; break; + } else if (tempb <= ZERO) { + ++nact; + xbdi.setEntry(i, ONE); + state = 100; break; + } + // Computing 2nd power + double d1 = trialStepPoint.getEntry(i); + // Computing 2nd power + double d2 = s.getEntry(i); + ssq = d1 * d1 + d2 * d2; + // Computing 2nd power + d1 = trustRegionCenterOffset.getEntry(i) - lowerDifference.getEntry(i); + temp = ssq - d1 * d1; + if (temp > ZERO) { + temp = FastMath.sqrt(temp) - s.getEntry(i); + if (angbd * temp > tempa) { + angbd = tempa / temp; + iact = i; + xsav = MINUS_ONE; + } + } + // Computing 2nd power + d1 = upperDifference.getEntry(i) - trustRegionCenterOffset.getEntry(i); + temp = ssq - d1 * d1; + if (temp > ZERO) { + temp = FastMath.sqrt(temp) + s.getEntry(i); + if (angbd * temp > tempb) { + angbd = tempb / temp; + iact = i; + xsav = ONE; + } + } + } + } + + // Calculate HHD and some curvatures for the alternative iteration. + + state = 210; break; + } + case 150: { + printState(150); // XXX + shs = ZERO; + dhs = ZERO; + dhd = ZERO; + for (int i = 0; i < n; i++) { + if (xbdi.getEntry(i) == ZERO) { + shs += s.getEntry(i) * hs.getEntry(i); + dhs += trialStepPoint.getEntry(i) * hs.getEntry(i); + dhd += trialStepPoint.getEntry(i) * hred.getEntry(i); + } + } + + // Seek the greatest reduction in Q for a range of equally spaced values + // of ANGT in [0,ANGBD], where ANGT is the tangent of half the angle of + // the alternative iteration. + + redmax = ZERO; + isav = -1; + redsav = ZERO; + iu = (int) (angbd * 17. + 3.1); + for (int i = 0; i < iu; i++) { + angt = angbd * i / iu; + sth = (angt + angt) / (ONE + angt * angt); + temp = shs + angt * (angt * dhd - dhs - dhs); + rednew = sth * (angt * dredg - sredg - HALF * sth * temp); + if (rednew > redmax) { + redmax = rednew; + isav = i; + rdprev = redsav; + } else if (i == isav + 1) { + rdnext = rednew; + } + redsav = rednew; + } + + // Return if the reduction is zero. Otherwise, set the sine and cosine + // of the angle of the alternative iteration, and calculate SDEC. + + if (isav < 0) { + state = 190; break; + } + if (isav < iu) { + temp = (rdnext - rdprev) / (redmax + redmax - rdprev - rdnext); + angt = angbd * (isav + HALF * temp) / iu; + } + cth = (ONE - angt * angt) / (ONE + angt * angt); + sth = (angt + angt) / (ONE + angt * angt); + temp = shs + angt * (angt * dhd - dhs - dhs); + sdec = sth * (angt * dredg - sredg - HALF * sth * temp); + if (sdec <= ZERO) { + state = 190; break; + } + + // Update GNEW, D and HRED. If the angle of the alternative iteration + // is restricted by a bound on a free variable, that variable is fixed + // at the bound. + + dredg = ZERO; + gredsq = ZERO; + for (int i = 0; i < n; i++) { + gnew.setEntry(i, gnew.getEntry(i) + (cth - ONE) * hred.getEntry(i) + sth * hs.getEntry(i)); + if (xbdi.getEntry(i) == ZERO) { + trialStepPoint.setEntry(i, cth * trialStepPoint.getEntry(i) + sth * s.getEntry(i)); + dredg += trialStepPoint.getEntry(i) * gnew.getEntry(i); + // Computing 2nd power + final double d1 = gnew.getEntry(i); + gredsq += d1 * d1; + } + hred.setEntry(i, cth * hred.getEntry(i) + sth * hs.getEntry(i)); + } + qred += sdec; + if (iact >= 0 && isav == iu) { + ++nact; + xbdi.setEntry(iact, xsav); + state = 100; break; + } + + // If SDEC is sufficiently small, then RETURN after setting XNEW to + // XOPT+D, giving careful attention to the bounds. + + if (sdec > qred * .01) { + state = 120; break; + } + } + case 190: { + printState(190); // XXX + dsq = ZERO; + for (int i = 0; i < n; i++) { + // Computing MAX + // Computing MIN + final double min = FastMath.min(trustRegionCenterOffset.getEntry(i) + trialStepPoint.getEntry(i), + upperDifference.getEntry(i)); + newPoint.setEntry(i, FastMath.max(min, lowerDifference.getEntry(i))); + if (xbdi.getEntry(i) == MINUS_ONE) { + newPoint.setEntry(i, lowerDifference.getEntry(i)); + } + if (xbdi.getEntry(i) == ONE) { + newPoint.setEntry(i, upperDifference.getEntry(i)); + } + trialStepPoint.setEntry(i, newPoint.getEntry(i) - trustRegionCenterOffset.getEntry(i)); + // Computing 2nd power + final double d1 = trialStepPoint.getEntry(i); + dsq += d1 * d1; + } + return new double[] { dsq, crvmin }; + // The following instructions multiply the current S-vector by the second + // derivative matrix of the quadratic model, putting the product in HS. + // They are reached from three different parts of the software above and + // they can be regarded as an external subroutine. + } + case 210: { + printState(210); // XXX + int ih = 0; + for (int j = 0; j < n; j++) { + hs.setEntry(j, ZERO); + for (int i = 0; i <= j; i++) { + if (i < j) { + hs.setEntry(j, hs.getEntry(j) + modelSecondDerivativesValues.getEntry(ih) * s.getEntry(i)); + } + hs.setEntry(i, hs.getEntry(i) + modelSecondDerivativesValues.getEntry(ih) * s.getEntry(j)); + ih++; + } + } + final RealVector tmp = interpolationPoints.operate(s).ebeMultiply(modelSecondDerivativesParameters); + for (int k = 0; k < npt; k++) { + if (modelSecondDerivativesParameters.getEntry(k) != ZERO) { + for (int i = 0; i < n; i++) { + hs.setEntry(i, hs.getEntry(i) + tmp.getEntry(k) * interpolationPoints.getEntry(k, i)); + } + } + } + if (crvmin != ZERO) { + state = 50; break; + } + if (iterc > itcsav) { + state = 150; break; + } + for (int i = 0; i < n; i++) { + hred.setEntry(i, hs.getEntry(i)); + } + state = 120; break; + } + default: { + throw new MathIllegalStateException(LocalizedFormats.SIMPLE_MESSAGE, "trsbox"); + }} + } + } // trsbox + + // ---------------------------------------------------------------------------------------- + + /** + * The arrays BMAT and ZMAT are updated, as required by the new position + * of the interpolation point that has the index KNEW. The vector VLAG has + * N+NPT components, set on entry to the first NPT and last N components + * of the product Hw in equation (4.11) of the Powell (2006) paper on + * NEWUOA. Further, BETA is set on entry to the value of the parameter + * with that name, and DENOM is set to the denominator of the updating + * formula. Elements of ZMAT may be treated as zero if their moduli are + * at most ZTEST. The first NDIM elements of W are used for working space. + * @param beta + * @param denom + * @param knew + */ + private void update( + double beta, + double denom, + int knew + ) { + printMethod(); // XXX + + final int n = currentBest.getDimension(); + final int npt = numberOfInterpolationPoints; + final int nptm = npt - n - 1; + + // XXX Should probably be split into two arrays. + final ArrayRealVector work = new ArrayRealVector(npt + n); + + double ztest = ZERO; + for (int k = 0; k < npt; k++) { + for (int j = 0; j < nptm; j++) { + // Computing MAX + ztest = FastMath.max(ztest, FastMath.abs(zMatrix.getEntry(k, j))); + } + } + ztest *= 1e-20; + + // Apply the rotations that put zeros in the KNEW-th row of ZMAT. + + for (int j = 1; j < nptm; j++) { + final double d1 = zMatrix.getEntry(knew, j); + if (FastMath.abs(d1) > ztest) { + // Computing 2nd power + final double d2 = zMatrix.getEntry(knew, 0); + // Computing 2nd power + final double d3 = zMatrix.getEntry(knew, j); + final double d4 = FastMath.sqrt(d2 * d2 + d3 * d3); + final double d5 = zMatrix.getEntry(knew, 0) / d4; + final double d6 = zMatrix.getEntry(knew, j) / d4; + for (int i = 0; i < npt; i++) { + final double d7 = d5 * zMatrix.getEntry(i, 0) + d6 * zMatrix.getEntry(i, j); + zMatrix.setEntry(i, j, d5 * zMatrix.getEntry(i, j) - d6 * zMatrix.getEntry(i, 0)); + zMatrix.setEntry(i, 0, d7); + } + } + zMatrix.setEntry(knew, j, ZERO); + } + + // Put the first NPT components of the KNEW-th column of HLAG into W, + // and calculate the parameters of the updating formula. + + for (int i = 0; i < npt; i++) { + work.setEntry(i, zMatrix.getEntry(knew, 0) * zMatrix.getEntry(i, 0)); + } + final double alpha = work.getEntry(knew); + final double tau = lagrangeValuesAtNewPoint.getEntry(knew); + lagrangeValuesAtNewPoint.setEntry(knew, lagrangeValuesAtNewPoint.getEntry(knew) - ONE); + + // Complete the updating of ZMAT. + + final double sqrtDenom = FastMath.sqrt(denom); + final double d1 = tau / sqrtDenom; + final double d2 = zMatrix.getEntry(knew, 0) / sqrtDenom; + for (int i = 0; i < npt; i++) { + zMatrix.setEntry(i, 0, + d1 * zMatrix.getEntry(i, 0) - d2 * lagrangeValuesAtNewPoint.getEntry(i)); + } + + // Finally, update the matrix BMAT. + + for (int j = 0; j < n; j++) { + final int jp = npt + j; + work.setEntry(jp, bMatrix.getEntry(knew, j)); + final double d3 = (alpha * lagrangeValuesAtNewPoint.getEntry(jp) - tau * work.getEntry(jp)) / denom; + final double d4 = (-beta * work.getEntry(jp) - tau * lagrangeValuesAtNewPoint.getEntry(jp)) / denom; + for (int i = 0; i <= jp; i++) { + bMatrix.setEntry(i, j, + bMatrix.getEntry(i, j) + d3 * lagrangeValuesAtNewPoint.getEntry(i) + d4 * work.getEntry(i)); + if (i >= npt) { + bMatrix.setEntry(jp, (i - npt), bMatrix.getEntry(i, j)); + } + } + } + } // update + + /** + * Performs validity checks. + * + * @param lowerBound Lower bounds (constraints) of the objective variables. + * @param upperBound Upperer bounds (constraints) of the objective variables. + */ + private void setup(double[] lowerBound, + double[] upperBound) { + printMethod(); // XXX + + double[] init = getStartPoint(); + final int dimension = init.length; + + // Check problem dimension. + if (dimension < MINIMUM_PROBLEM_DIMENSION) { + throw new NumberIsTooSmallException(dimension, MINIMUM_PROBLEM_DIMENSION, true); + } + // Check number of interpolation points. + final int[] nPointsInterval = { dimension + 2, (dimension + 2) * (dimension + 1) / 2 }; + if (numberOfInterpolationPoints < nPointsInterval[0] || + numberOfInterpolationPoints > nPointsInterval[1]) { + throw new OutOfRangeException(LocalizedFormats.NUMBER_OF_INTERPOLATION_POINTS, + numberOfInterpolationPoints, + nPointsInterval[0], + nPointsInterval[1]); + } + + // Initialize bound differences. + boundDifference = new double[dimension]; + + double requiredMinDiff = 2 * initialTrustRegionRadius; + double minDiff = Double.POSITIVE_INFINITY; + for (int i = 0; i < dimension; i++) { + boundDifference[i] = upperBound[i] - lowerBound[i]; + minDiff = FastMath.min(minDiff, boundDifference[i]); + } + if (minDiff < requiredMinDiff) { + initialTrustRegionRadius = minDiff / 3.0; + } + + // Initialize the data structures used by the "bobyqa" method. + bMatrix = new Array2DRowRealMatrix(dimension + numberOfInterpolationPoints, + dimension); + zMatrix = new Array2DRowRealMatrix(numberOfInterpolationPoints, + numberOfInterpolationPoints - dimension - 1); + interpolationPoints = new Array2DRowRealMatrix(numberOfInterpolationPoints, + dimension); + originShift = new ArrayRealVector(dimension); + fAtInterpolationPoints = new ArrayRealVector(numberOfInterpolationPoints); + trustRegionCenterOffset = new ArrayRealVector(dimension); + gradientAtTrustRegionCenter = new ArrayRealVector(dimension); + lowerDifference = new ArrayRealVector(dimension); + upperDifference = new ArrayRealVector(dimension); + modelSecondDerivativesParameters = new ArrayRealVector(numberOfInterpolationPoints); + newPoint = new ArrayRealVector(dimension); + alternativeNewPoint = new ArrayRealVector(dimension); + trialStepPoint = new ArrayRealVector(dimension); + lagrangeValuesAtNewPoint = new ArrayRealVector(dimension + numberOfInterpolationPoints); + modelSecondDerivativesValues = new ArrayRealVector(dimension * (dimension + 1) / 2); + } + + // XXX utility for figuring out call sequence. + private static String caller(int n) { + final Throwable t = new Throwable(); + final StackTraceElement[] elements = t.getStackTrace(); + final StackTraceElement e = elements[n]; + return e.getMethodName() + " (at line " + e.getLineNumber() + ")"; + } + // XXX utility for figuring out call sequence. + private static void printState(int s) { + // System.out.println(caller(2) + ": state " + s); + } + // XXX utility for figuring out call sequence. + private static void printMethod() { + // System.out.println(caller(2)); + } + + /** + * Marker for code paths that are not explored with the current unit tests. + * If the path becomes explored, it should just be removed from the code. + */ + private static class PathIsExploredException extends RuntimeException { + /** Serializable UID. */ + private static final long serialVersionUID = 745350979634801853L; + + /** Message string. */ + private static final String PATH_IS_EXPLORED + = "If this exception is thrown, just remove it from the code"; + + PathIsExploredException() { + super(PATH_IS_EXPLORED + " " + BOBYQAOptimizer.caller(3)); + } + } +} +//CHECKSTYLE: resume all diff --git a/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/noderiv/CMAESOptimizer.java b/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/noderiv/CMAESOptimizer.java new file mode 100644 index 0000000..13566be --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/noderiv/CMAESOptimizer.java @@ -0,0 +1,1354 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.math3.optim.nonlinear.scalar.noderiv; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.apache.commons.math3.exception.DimensionMismatchException; +import org.apache.commons.math3.exception.NotPositiveException; +import org.apache.commons.math3.exception.NotStrictlyPositiveException; +import org.apache.commons.math3.exception.OutOfRangeException; +import org.apache.commons.math3.exception.TooManyEvaluationsException; +import org.apache.commons.math3.linear.Array2DRowRealMatrix; +import org.apache.commons.math3.linear.EigenDecomposition; +import org.apache.commons.math3.linear.MatrixUtils; +import org.apache.commons.math3.linear.RealMatrix; +import org.apache.commons.math3.optim.ConvergenceChecker; +import org.apache.commons.math3.optim.OptimizationData; +import org.apache.commons.math3.optim.nonlinear.scalar.GoalType; +import org.apache.commons.math3.optim.PointValuePair; +import org.apache.commons.math3.optim.nonlinear.scalar.MultivariateOptimizer; +import org.apache.commons.math3.random.RandomGenerator; +import org.apache.commons.math3.util.FastMath; +import org.apache.commons.math3.util.MathArrays; + +/** + * An implementation of the active Covariance Matrix Adaptation Evolution Strategy (CMA-ES) + * for non-linear, non-convex, non-smooth, global function minimization. + * <p> + * The CMA-Evolution Strategy (CMA-ES) is a reliable stochastic optimization method + * which should be applied if derivative-based methods, e.g. quasi-Newton BFGS or + * conjugate gradient, fail due to a rugged search landscape (e.g. noise, local + * optima, outlier, etc.) of the objective function. Like a + * quasi-Newton method, the CMA-ES learns and applies a variable metric + * on the underlying search space. Unlike a quasi-Newton method, the + * CMA-ES neither estimates nor uses gradients, making it considerably more + * reliable in terms of finding a good, or even close to optimal, solution. + * <p> + * In general, on smooth objective functions the CMA-ES is roughly ten times + * slower than BFGS (counting objective function evaluations, no gradients provided). + * For up to <math>N=10</math> variables also the derivative-free simplex + * direct search method (Nelder and Mead) can be faster, but it is + * far less reliable than CMA-ES. + * <p> + * The CMA-ES is particularly well suited for non-separable + * and/or badly conditioned problems. To observe the advantage of CMA compared + * to a conventional evolution strategy, it will usually take about + * <math>30 N</math> function evaluations. On difficult problems the complete + * optimization (a single run) is expected to take <em>roughly</em> between + * <math>30 N</math> and <math>300 N<sup>2</sup></math> + * function evaluations. + * <p> + * This implementation is translated and adapted from the Matlab version + * of the CMA-ES algorithm as implemented in module {@code cmaes.m} version 3.51. + * <p> + * For more information, please refer to the following links: + * <ul> + * <li><a href="http://www.lri.fr/~hansen/cmaes.m">Matlab code</a></li> + * <li><a href="http://www.lri.fr/~hansen/cmaesintro.html">Introduction to CMA-ES</a></li> + * <li><a href="http://en.wikipedia.org/wiki/CMA-ES">Wikipedia</a></li> + * </ul> + * + * @since 3.0 + */ +public class CMAESOptimizer + extends MultivariateOptimizer { + // global search parameters + /** + * Population size, offspring number. The primary strategy parameter to play + * with, which can be increased from its default value. Increasing the + * population size improves global search properties in exchange to speed. + * Speed decreases, as a rule, at most linearly with increasing population + * size. It is advisable to begin with the default small population size. + */ + private int lambda; // population size + /** + * Covariance update mechanism, default is active CMA. isActiveCMA = true + * turns on "active CMA" with a negative update of the covariance matrix and + * checks for positive definiteness. OPTS.CMA.active = 2 does not check for + * pos. def. and is numerically faster. Active CMA usually speeds up the + * adaptation. + */ + private final boolean isActiveCMA; + /** + * Determines how often a new random offspring is generated in case it is + * not feasible / beyond the defined limits, default is 0. + */ + private final int checkFeasableCount; + /** + * @see Sigma + */ + private double[] inputSigma; + /** Number of objective variables/problem dimension */ + private int dimension; + /** + * Defines the number of initial iterations, where the covariance matrix + * remains diagonal and the algorithm has internally linear time complexity. + * diagonalOnly = 1 means keeping the covariance matrix always diagonal and + * this setting also exhibits linear space complexity. This can be + * particularly useful for dimension > 100. + * @see <a href="http://hal.archives-ouvertes.fr/inria-00287367/en">A Simple Modification in CMA-ES</a> + */ + private int diagonalOnly; + /** Number of objective variables/problem dimension */ + private boolean isMinimize = true; + /** Indicates whether statistic data is collected. */ + private final boolean generateStatistics; + + // termination criteria + /** Maximal number of iterations allowed. */ + private final int maxIterations; + /** Limit for fitness value. */ + private final double stopFitness; + /** Stop if x-changes larger stopTolUpX. */ + private double stopTolUpX; + /** Stop if x-change smaller stopTolX. */ + private double stopTolX; + /** Stop if fun-changes smaller stopTolFun. */ + private double stopTolFun; + /** Stop if back fun-changes smaller stopTolHistFun. */ + private double stopTolHistFun; + + // selection strategy parameters + /** Number of parents/points for recombination. */ + private int mu; // + /** log(mu + 0.5), stored for efficiency. */ + private double logMu2; + /** Array for weighted recombination. */ + private RealMatrix weights; + /** Variance-effectiveness of sum w_i x_i. */ + private double mueff; // + + // dynamic strategy parameters and constants + /** Overall standard deviation - search volume. */ + private double sigma; + /** Cumulation constant. */ + private double cc; + /** Cumulation constant for step-size. */ + private double cs; + /** Damping for step-size. */ + private double damps; + /** Learning rate for rank-one update. */ + private double ccov1; + /** Learning rate for rank-mu update' */ + private double ccovmu; + /** Expectation of ||N(0,I)|| == norm(randn(N,1)). */ + private double chiN; + /** Learning rate for rank-one update - diagonalOnly */ + private double ccov1Sep; + /** Learning rate for rank-mu update - diagonalOnly */ + private double ccovmuSep; + + // CMA internal values - updated each generation + /** Objective variables. */ + private RealMatrix xmean; + /** Evolution path. */ + private RealMatrix pc; + /** Evolution path for sigma. */ + private RealMatrix ps; + /** Norm of ps, stored for efficiency. */ + private double normps; + /** Coordinate system. */ + private RealMatrix B; + /** Scaling. */ + private RealMatrix D; + /** B*D, stored for efficiency. */ + private RealMatrix BD; + /** Diagonal of sqrt(D), stored for efficiency. */ + private RealMatrix diagD; + /** Covariance matrix. */ + private RealMatrix C; + /** Diagonal of C, used for diagonalOnly. */ + private RealMatrix diagC; + /** Number of iterations already performed. */ + private int iterations; + + /** History queue of best values. */ + private double[] fitnessHistory; + /** Size of history queue of best values. */ + private int historySize; + + /** Random generator. */ + private final RandomGenerator random; + + /** History of sigma values. */ + private final List<Double> statisticsSigmaHistory = new ArrayList<Double>(); + /** History of mean matrix. */ + private final List<RealMatrix> statisticsMeanHistory = new ArrayList<RealMatrix>(); + /** History of fitness values. */ + private final List<Double> statisticsFitnessHistory = new ArrayList<Double>(); + /** History of D matrix. */ + private final List<RealMatrix> statisticsDHistory = new ArrayList<RealMatrix>(); + + /** + * @param maxIterations Maximal number of iterations. + * @param stopFitness Whether to stop if objective function value is smaller than + * {@code stopFitness}. + * @param isActiveCMA Chooses the covariance matrix update method. + * @param diagonalOnly Number of initial iterations, where the covariance matrix + * remains diagonal. + * @param checkFeasableCount Determines how often new random objective variables are + * generated in case they are out of bounds. + * @param random Random generator. + * @param generateStatistics Whether statistic data is collected. + * @param checker Convergence checker. + * + * @since 3.1 + */ + public CMAESOptimizer(int maxIterations, + double stopFitness, + boolean isActiveCMA, + int diagonalOnly, + int checkFeasableCount, + RandomGenerator random, + boolean generateStatistics, + ConvergenceChecker<PointValuePair> checker) { + super(checker); + this.maxIterations = maxIterations; + this.stopFitness = stopFitness; + this.isActiveCMA = isActiveCMA; + this.diagonalOnly = diagonalOnly; + this.checkFeasableCount = checkFeasableCount; + this.random = random; + this.generateStatistics = generateStatistics; + } + + /** + * @return History of sigma values. + */ + public List<Double> getStatisticsSigmaHistory() { + return statisticsSigmaHistory; + } + + /** + * @return History of mean matrix. + */ + public List<RealMatrix> getStatisticsMeanHistory() { + return statisticsMeanHistory; + } + + /** + * @return History of fitness values. + */ + public List<Double> getStatisticsFitnessHistory() { + return statisticsFitnessHistory; + } + + /** + * @return History of D matrix. + */ + public List<RealMatrix> getStatisticsDHistory() { + return statisticsDHistory; + } + + /** + * Input sigma values. + * They define the initial coordinate-wise standard deviations for + * sampling new search points around the initial guess. + * It is suggested to set them to the estimated distance from the + * initial to the desired optimum. + * Small values induce the search to be more local (and very small + * values are more likely to find a local optimum close to the initial + * guess). + * Too small values might however lead to early termination. + */ + public static class Sigma implements OptimizationData { + /** Sigma values. */ + private final double[] sigma; + + /** + * @param s Sigma values. + * @throws NotPositiveException if any of the array entries is smaller + * than zero. + */ + public Sigma(double[] s) + throws NotPositiveException { + for (int i = 0; i < s.length; i++) { + if (s[i] < 0) { + throw new NotPositiveException(s[i]); + } + } + + sigma = s.clone(); + } + + /** + * @return the sigma values. + */ + public double[] getSigma() { + return sigma.clone(); + } + } + + /** + * Population size. + * The number of offspring is the primary strategy parameter. + * In the absence of better clues, a good default could be an + * integer close to {@code 4 + 3 ln(n)}, where {@code n} is the + * number of optimized parameters. + * Increasing the population size improves global search properties + * at the expense of speed (which in general decreases at most + * linearly with increasing population size). + */ + public static class PopulationSize implements OptimizationData { + /** Population size. */ + private final int lambda; + + /** + * @param size Population size. + * @throws NotStrictlyPositiveException if {@code size <= 0}. + */ + public PopulationSize(int size) + throws NotStrictlyPositiveException { + if (size <= 0) { + throw new NotStrictlyPositiveException(size); + } + lambda = size; + } + + /** + * @return the population size. + */ + public int getPopulationSize() { + return lambda; + } + } + + /** + * {@inheritDoc} + * + * @param optData Optimization data. In addition to those documented in + * {@link MultivariateOptimizer#parseOptimizationData(OptimizationData[]) + * MultivariateOptimizer}, this method will register the following data: + * <ul> + * <li>{@link Sigma}</li> + * <li>{@link PopulationSize}</li> + * </ul> + * @return {@inheritDoc} + * @throws TooManyEvaluationsException if the maximal number of + * evaluations is exceeded. + * @throws DimensionMismatchException if the initial guess, target, and weight + * arguments have inconsistent dimensions. + */ + @Override + public PointValuePair optimize(OptimizationData... optData) + throws TooManyEvaluationsException, + DimensionMismatchException { + // Set up base class and perform computation. + return super.optimize(optData); + } + + /** {@inheritDoc} */ + @Override + protected PointValuePair doOptimize() { + // -------------------- Initialization -------------------------------- + isMinimize = getGoalType().equals(GoalType.MINIMIZE); + final FitnessFunction fitfun = new FitnessFunction(); + final double[] guess = getStartPoint(); + // number of objective variables/problem dimension + dimension = guess.length; + initializeCMA(guess); + iterations = 0; + ValuePenaltyPair valuePenalty = fitfun.value(guess); + double bestValue = valuePenalty.value+valuePenalty.penalty; + push(fitnessHistory, bestValue); + PointValuePair optimum + = new PointValuePair(getStartPoint(), + isMinimize ? bestValue : -bestValue); + PointValuePair lastResult = null; + + // -------------------- Generation Loop -------------------------------- + + generationLoop: + for (iterations = 1; iterations <= maxIterations; iterations++) { + incrementIterationCount(); + + // Generate and evaluate lambda offspring + final RealMatrix arz = randn1(dimension, lambda); + final RealMatrix arx = zeros(dimension, lambda); + final double[] fitness = new double[lambda]; + final ValuePenaltyPair[] valuePenaltyPairs = new ValuePenaltyPair[lambda]; + // generate random offspring + for (int k = 0; k < lambda; k++) { + RealMatrix arxk = null; + for (int i = 0; i < checkFeasableCount + 1; i++) { + if (diagonalOnly <= 0) { + arxk = xmean.add(BD.multiply(arz.getColumnMatrix(k)) + .scalarMultiply(sigma)); // m + sig * Normal(0,C) + } else { + arxk = xmean.add(times(diagD,arz.getColumnMatrix(k)) + .scalarMultiply(sigma)); + } + if (i >= checkFeasableCount || + fitfun.isFeasible(arxk.getColumn(0))) { + break; + } + // regenerate random arguments for row + arz.setColumn(k, randn(dimension)); + } + copyColumn(arxk, 0, arx, k); + try { + valuePenaltyPairs[k] = fitfun.value(arx.getColumn(k)); // compute fitness + } catch (TooManyEvaluationsException e) { + break generationLoop; + } + } + + // Compute fitnesses by adding value and penalty after scaling by value range. + double valueRange = valueRange(valuePenaltyPairs); + for (int iValue=0;iValue<valuePenaltyPairs.length;iValue++) { + fitness[iValue] = valuePenaltyPairs[iValue].value + valuePenaltyPairs[iValue].penalty*valueRange; + } + + // Sort by fitness and compute weighted mean into xmean + final int[] arindex = sortedIndices(fitness); + // Calculate new xmean, this is selection and recombination + final RealMatrix xold = xmean; // for speed up of Eq. (2) and (3) + final RealMatrix bestArx = selectColumns(arx, MathArrays.copyOf(arindex, mu)); + xmean = bestArx.multiply(weights); + final RealMatrix bestArz = selectColumns(arz, MathArrays.copyOf(arindex, mu)); + final RealMatrix zmean = bestArz.multiply(weights); + final boolean hsig = updateEvolutionPaths(zmean, xold); + if (diagonalOnly <= 0) { + updateCovariance(hsig, bestArx, arz, arindex, xold); + } else { + updateCovarianceDiagonalOnly(hsig, bestArz); + } + // Adapt step size sigma - Eq. (5) + sigma *= FastMath.exp(FastMath.min(1, (normps/chiN - 1) * cs / damps)); + final double bestFitness = fitness[arindex[0]]; + final double worstFitness = fitness[arindex[arindex.length - 1]]; + if (bestValue > bestFitness) { + bestValue = bestFitness; + lastResult = optimum; + optimum = new PointValuePair(fitfun.repair(bestArx.getColumn(0)), + isMinimize ? bestFitness : -bestFitness); + if (getConvergenceChecker() != null && lastResult != null && + getConvergenceChecker().converged(iterations, optimum, lastResult)) { + break generationLoop; + } + } + // handle termination criteria + // Break, if fitness is good enough + if (stopFitness != 0 && bestFitness < (isMinimize ? stopFitness : -stopFitness)) { + break generationLoop; + } + final double[] sqrtDiagC = sqrt(diagC).getColumn(0); + final double[] pcCol = pc.getColumn(0); + for (int i = 0; i < dimension; i++) { + if (sigma * FastMath.max(FastMath.abs(pcCol[i]), sqrtDiagC[i]) > stopTolX) { + break; + } + if (i >= dimension - 1) { + break generationLoop; + } + } + for (int i = 0; i < dimension; i++) { + if (sigma * sqrtDiagC[i] > stopTolUpX) { + break generationLoop; + } + } + final double historyBest = min(fitnessHistory); + final double historyWorst = max(fitnessHistory); + if (iterations > 2 && + FastMath.max(historyWorst, worstFitness) - + FastMath.min(historyBest, bestFitness) < stopTolFun) { + break generationLoop; + } + if (iterations > fitnessHistory.length && + historyWorst - historyBest < stopTolHistFun) { + break generationLoop; + } + // condition number of the covariance matrix exceeds 1e14 + if (max(diagD) / min(diagD) > 1e7) { + break generationLoop; + } + // user defined termination + if (getConvergenceChecker() != null) { + final PointValuePair current + = new PointValuePair(bestArx.getColumn(0), + isMinimize ? bestFitness : -bestFitness); + if (lastResult != null && + getConvergenceChecker().converged(iterations, current, lastResult)) { + break generationLoop; + } + lastResult = current; + } + // Adjust step size in case of equal function values (flat fitness) + if (bestValue == fitness[arindex[(int)(0.1+lambda/4.)]]) { + sigma *= FastMath.exp(0.2 + cs / damps); + } + if (iterations > 2 && FastMath.max(historyWorst, bestFitness) - + FastMath.min(historyBest, bestFitness) == 0) { + sigma *= FastMath.exp(0.2 + cs / damps); + } + // store best in history + push(fitnessHistory,bestFitness); + if (generateStatistics) { + statisticsSigmaHistory.add(sigma); + statisticsFitnessHistory.add(bestFitness); + statisticsMeanHistory.add(xmean.transpose()); + statisticsDHistory.add(diagD.transpose().scalarMultiply(1E5)); + } + } + return optimum; + } + + /** + * Scans the list of (required and optional) optimization data that + * characterize the problem. + * + * @param optData Optimization data. The following data will be looked for: + * <ul> + * <li>{@link Sigma}</li> + * <li>{@link PopulationSize}</li> + * </ul> + */ + @Override + protected void parseOptimizationData(OptimizationData... optData) { + // Allow base class to register its own data. + super.parseOptimizationData(optData); + + // The existing values (as set by the previous call) are reused if + // not provided in the argument list. + for (OptimizationData data : optData) { + if (data instanceof Sigma) { + inputSigma = ((Sigma) data).getSigma(); + continue; + } + if (data instanceof PopulationSize) { + lambda = ((PopulationSize) data).getPopulationSize(); + continue; + } + } + + checkParameters(); + } + + /** + * Checks dimensions and values of boundaries and inputSigma if defined. + */ + private void checkParameters() { + final double[] init = getStartPoint(); + final double[] lB = getLowerBound(); + final double[] uB = getUpperBound(); + + if (inputSigma != null) { + if (inputSigma.length != init.length) { + throw new DimensionMismatchException(inputSigma.length, init.length); + } + for (int i = 0; i < init.length; i++) { + if (inputSigma[i] > uB[i] - lB[i]) { + throw new OutOfRangeException(inputSigma[i], 0, uB[i] - lB[i]); + } + } + } + } + + /** + * Initialization of the dynamic search parameters + * + * @param guess Initial guess for the arguments of the fitness function. + */ + private void initializeCMA(double[] guess) { + if (lambda <= 0) { + throw new NotStrictlyPositiveException(lambda); + } + // initialize sigma + final double[][] sigmaArray = new double[guess.length][1]; + for (int i = 0; i < guess.length; i++) { + sigmaArray[i][0] = inputSigma[i]; + } + final RealMatrix insigma = new Array2DRowRealMatrix(sigmaArray, false); + sigma = max(insigma); // overall standard deviation + + // initialize termination criteria + stopTolUpX = 1e3 * max(insigma); + stopTolX = 1e-11 * max(insigma); + stopTolFun = 1e-12; + stopTolHistFun = 1e-13; + + // initialize selection strategy parameters + mu = lambda / 2; // number of parents/points for recombination + logMu2 = FastMath.log(mu + 0.5); + weights = log(sequence(1, mu, 1)).scalarMultiply(-1).scalarAdd(logMu2); + double sumw = 0; + double sumwq = 0; + for (int i = 0; i < mu; i++) { + double w = weights.getEntry(i, 0); + sumw += w; + sumwq += w * w; + } + weights = weights.scalarMultiply(1 / sumw); + mueff = sumw * sumw / sumwq; // variance-effectiveness of sum w_i x_i + + // initialize dynamic strategy parameters and constants + cc = (4 + mueff / dimension) / + (dimension + 4 + 2 * mueff / dimension); + cs = (mueff + 2) / (dimension + mueff + 3.); + damps = (1 + 2 * FastMath.max(0, FastMath.sqrt((mueff - 1) / + (dimension + 1)) - 1)) * + FastMath.max(0.3, + 1 - dimension / (1e-6 + maxIterations)) + cs; // minor increment + ccov1 = 2 / ((dimension + 1.3) * (dimension + 1.3) + mueff); + ccovmu = FastMath.min(1 - ccov1, 2 * (mueff - 2 + 1 / mueff) / + ((dimension + 2) * (dimension + 2) + mueff)); + ccov1Sep = FastMath.min(1, ccov1 * (dimension + 1.5) / 3); + ccovmuSep = FastMath.min(1 - ccov1, ccovmu * (dimension + 1.5) / 3); + chiN = FastMath.sqrt(dimension) * + (1 - 1 / ((double) 4 * dimension) + 1 / ((double) 21 * dimension * dimension)); + // intialize CMA internal values - updated each generation + xmean = MatrixUtils.createColumnRealMatrix(guess); // objective variables + diagD = insigma.scalarMultiply(1 / sigma); + diagC = square(diagD); + pc = zeros(dimension, 1); // evolution paths for C and sigma + ps = zeros(dimension, 1); // B defines the coordinate system + normps = ps.getFrobeniusNorm(); + + B = eye(dimension, dimension); + D = ones(dimension, 1); // diagonal D defines the scaling + BD = times(B, repmat(diagD.transpose(), dimension, 1)); + C = B.multiply(diag(square(D)).multiply(B.transpose())); // covariance + historySize = 10 + (int) (3 * 10 * dimension / (double) lambda); + fitnessHistory = new double[historySize]; // history of fitness values + for (int i = 0; i < historySize; i++) { + fitnessHistory[i] = Double.MAX_VALUE; + } + } + + /** + * Update of the evolution paths ps and pc. + * + * @param zmean Weighted row matrix of the gaussian random numbers generating + * the current offspring. + * @param xold xmean matrix of the previous generation. + * @return hsig flag indicating a small correction. + */ + private boolean updateEvolutionPaths(RealMatrix zmean, RealMatrix xold) { + ps = ps.scalarMultiply(1 - cs).add( + B.multiply(zmean).scalarMultiply( + FastMath.sqrt(cs * (2 - cs) * mueff))); + normps = ps.getFrobeniusNorm(); + final boolean hsig = normps / + FastMath.sqrt(1 - FastMath.pow(1 - cs, 2 * iterations)) / + chiN < 1.4 + 2 / ((double) dimension + 1); + pc = pc.scalarMultiply(1 - cc); + if (hsig) { + pc = pc.add(xmean.subtract(xold).scalarMultiply(FastMath.sqrt(cc * (2 - cc) * mueff) / sigma)); + } + return hsig; + } + + /** + * Update of the covariance matrix C for diagonalOnly > 0 + * + * @param hsig Flag indicating a small correction. + * @param bestArz Fitness-sorted matrix of the gaussian random values of the + * current offspring. + */ + private void updateCovarianceDiagonalOnly(boolean hsig, + final RealMatrix bestArz) { + // minor correction if hsig==false + double oldFac = hsig ? 0 : ccov1Sep * cc * (2 - cc); + oldFac += 1 - ccov1Sep - ccovmuSep; + diagC = diagC.scalarMultiply(oldFac) // regard old matrix + .add(square(pc).scalarMultiply(ccov1Sep)) // plus rank one update + .add((times(diagC, square(bestArz).multiply(weights))) // plus rank mu update + .scalarMultiply(ccovmuSep)); + diagD = sqrt(diagC); // replaces eig(C) + if (diagonalOnly > 1 && + iterations > diagonalOnly) { + // full covariance matrix from now on + diagonalOnly = 0; + B = eye(dimension, dimension); + BD = diag(diagD); + C = diag(diagC); + } + } + + /** + * Update of the covariance matrix C. + * + * @param hsig Flag indicating a small correction. + * @param bestArx Fitness-sorted matrix of the argument vectors producing the + * current offspring. + * @param arz Unsorted matrix containing the gaussian random values of the + * current offspring. + * @param arindex Indices indicating the fitness-order of the current offspring. + * @param xold xmean matrix of the previous generation. + */ + private void updateCovariance(boolean hsig, final RealMatrix bestArx, + final RealMatrix arz, final int[] arindex, + final RealMatrix xold) { + double negccov = 0; + if (ccov1 + ccovmu > 0) { + final RealMatrix arpos = bestArx.subtract(repmat(xold, 1, mu)) + .scalarMultiply(1 / sigma); // mu difference vectors + final RealMatrix roneu = pc.multiply(pc.transpose()) + .scalarMultiply(ccov1); // rank one update + // minor correction if hsig==false + double oldFac = hsig ? 0 : ccov1 * cc * (2 - cc); + oldFac += 1 - ccov1 - ccovmu; + if (isActiveCMA) { + // Adapt covariance matrix C active CMA + negccov = (1 - ccovmu) * 0.25 * mueff / + (FastMath.pow(dimension + 2, 1.5) + 2 * mueff); + // keep at least 0.66 in all directions, small popsize are most + // critical + final double negminresidualvariance = 0.66; + // where to make up for the variance loss + final double negalphaold = 0.5; + // prepare vectors, compute negative updating matrix Cneg + final int[] arReverseIndex = reverse(arindex); + RealMatrix arzneg = selectColumns(arz, MathArrays.copyOf(arReverseIndex, mu)); + RealMatrix arnorms = sqrt(sumRows(square(arzneg))); + final int[] idxnorms = sortedIndices(arnorms.getRow(0)); + final RealMatrix arnormsSorted = selectColumns(arnorms, idxnorms); + final int[] idxReverse = reverse(idxnorms); + final RealMatrix arnormsReverse = selectColumns(arnorms, idxReverse); + arnorms = divide(arnormsReverse, arnormsSorted); + final int[] idxInv = inverse(idxnorms); + final RealMatrix arnormsInv = selectColumns(arnorms, idxInv); + // check and set learning rate negccov + final double negcovMax = (1 - negminresidualvariance) / + square(arnormsInv).multiply(weights).getEntry(0, 0); + if (negccov > negcovMax) { + negccov = negcovMax; + } + arzneg = times(arzneg, repmat(arnormsInv, dimension, 1)); + final RealMatrix artmp = BD.multiply(arzneg); + final RealMatrix Cneg = artmp.multiply(diag(weights)).multiply(artmp.transpose()); + oldFac += negalphaold * negccov; + C = C.scalarMultiply(oldFac) + .add(roneu) // regard old matrix + .add(arpos.scalarMultiply( // plus rank one update + ccovmu + (1 - negalphaold) * negccov) // plus rank mu update + .multiply(times(repmat(weights, 1, dimension), + arpos.transpose()))) + .subtract(Cneg.scalarMultiply(negccov)); + } else { + // Adapt covariance matrix C - nonactive + C = C.scalarMultiply(oldFac) // regard old matrix + .add(roneu) // plus rank one update + .add(arpos.scalarMultiply(ccovmu) // plus rank mu update + .multiply(times(repmat(weights, 1, dimension), + arpos.transpose()))); + } + } + updateBD(negccov); + } + + /** + * Update B and D from C. + * + * @param negccov Negative covariance factor. + */ + private void updateBD(double negccov) { + if (ccov1 + ccovmu + negccov > 0 && + (iterations % 1. / (ccov1 + ccovmu + negccov) / dimension / 10.) < 1) { + // to achieve O(N^2) + C = triu(C, 0).add(triu(C, 1).transpose()); + // enforce symmetry to prevent complex numbers + final EigenDecomposition eig = new EigenDecomposition(C); + B = eig.getV(); // eigen decomposition, B==normalized eigenvectors + D = eig.getD(); + diagD = diag(D); + if (min(diagD) <= 0) { + for (int i = 0; i < dimension; i++) { + if (diagD.getEntry(i, 0) < 0) { + diagD.setEntry(i, 0, 0); + } + } + final double tfac = max(diagD) / 1e14; + C = C.add(eye(dimension, dimension).scalarMultiply(tfac)); + diagD = diagD.add(ones(dimension, 1).scalarMultiply(tfac)); + } + if (max(diagD) > 1e14 * min(diagD)) { + final double tfac = max(diagD) / 1e14 - min(diagD); + C = C.add(eye(dimension, dimension).scalarMultiply(tfac)); + diagD = diagD.add(ones(dimension, 1).scalarMultiply(tfac)); + } + diagC = diag(C); + diagD = sqrt(diagD); // D contains standard deviations now + BD = times(B, repmat(diagD.transpose(), dimension, 1)); // O(n^2) + } + } + + /** + * Pushes the current best fitness value in a history queue. + * + * @param vals History queue. + * @param val Current best fitness value. + */ + private static void push(double[] vals, double val) { + for (int i = vals.length-1; i > 0; i--) { + vals[i] = vals[i-1]; + } + vals[0] = val; + } + + /** + * Sorts fitness values. + * + * @param doubles Array of values to be sorted. + * @return a sorted array of indices pointing into doubles. + */ + private int[] sortedIndices(final double[] doubles) { + final DoubleIndex[] dis = new DoubleIndex[doubles.length]; + for (int i = 0; i < doubles.length; i++) { + dis[i] = new DoubleIndex(doubles[i], i); + } + Arrays.sort(dis); + final int[] indices = new int[doubles.length]; + for (int i = 0; i < doubles.length; i++) { + indices[i] = dis[i].index; + } + return indices; + } + /** + * Get range of values. + * + * @param vpPairs Array of valuePenaltyPairs to get range from. + * @return a double equal to maximum value minus minimum value. + */ + private double valueRange(final ValuePenaltyPair[] vpPairs) { + double max = Double.NEGATIVE_INFINITY; + double min = Double.MAX_VALUE; + for (ValuePenaltyPair vpPair:vpPairs) { + if (vpPair.value > max) { + max = vpPair.value; + } + if (vpPair.value < min) { + min = vpPair.value; + } + } + return max-min; + } + + /** + * Used to sort fitness values. Sorting is always in lower value first + * order. + */ + private static class DoubleIndex implements Comparable<DoubleIndex> { + /** Value to compare. */ + private final double value; + /** Index into sorted array. */ + private final int index; + + /** + * @param value Value to compare. + * @param index Index into sorted array. + */ + DoubleIndex(double value, int index) { + this.value = value; + this.index = index; + } + + /** {@inheritDoc} */ + public int compareTo(DoubleIndex o) { + return Double.compare(value, o.value); + } + + /** {@inheritDoc} */ + @Override + public boolean equals(Object other) { + + if (this == other) { + return true; + } + + if (other instanceof DoubleIndex) { + return Double.compare(value, ((DoubleIndex) other).value) == 0; + } + + return false; + } + + /** {@inheritDoc} */ + @Override + public int hashCode() { + long bits = Double.doubleToLongBits(value); + return (int) ((1438542 ^ (bits >>> 32) ^ bits) & 0xffffffff); + } + } + /** + * Stores the value and penalty (for repair of out of bounds point). + */ + private static class ValuePenaltyPair { + /** Objective function value. */ + private double value; + /** Penalty value for repair of out out of bounds points. */ + private double penalty; + + /** + * @param value Function value. + * @param penalty Out-of-bounds penalty. + */ + ValuePenaltyPair(final double value, final double penalty) { + this.value = value; + this.penalty = penalty; + } + } + + + /** + * Normalizes fitness values to the range [0,1]. Adds a penalty to the + * fitness value if out of range. + */ + private class FitnessFunction { + /** + * Flag indicating whether the objective variables are forced into their + * bounds if defined + */ + private final boolean isRepairMode; + + /** Simple constructor. + */ + FitnessFunction() { + isRepairMode = true; + } + + /** + * @param point Normalized objective variables. + * @return the objective value + penalty for violated bounds. + */ + public ValuePenaltyPair value(final double[] point) { + double value; + double penalty=0.0; + if (isRepairMode) { + double[] repaired = repair(point); + value = CMAESOptimizer.this.computeObjectiveValue(repaired); + penalty = penalty(point, repaired); + } else { + value = CMAESOptimizer.this.computeObjectiveValue(point); + } + value = isMinimize ? value : -value; + penalty = isMinimize ? penalty : -penalty; + return new ValuePenaltyPair(value,penalty); + } + + /** + * @param x Normalized objective variables. + * @return {@code true} if in bounds. + */ + public boolean isFeasible(final double[] x) { + final double[] lB = CMAESOptimizer.this.getLowerBound(); + final double[] uB = CMAESOptimizer.this.getUpperBound(); + + for (int i = 0; i < x.length; i++) { + if (x[i] < lB[i]) { + return false; + } + if (x[i] > uB[i]) { + return false; + } + } + return true; + } + + /** + * @param x Normalized objective variables. + * @return the repaired (i.e. all in bounds) objective variables. + */ + private double[] repair(final double[] x) { + final double[] lB = CMAESOptimizer.this.getLowerBound(); + final double[] uB = CMAESOptimizer.this.getUpperBound(); + + final double[] repaired = new double[x.length]; + for (int i = 0; i < x.length; i++) { + if (x[i] < lB[i]) { + repaired[i] = lB[i]; + } else if (x[i] > uB[i]) { + repaired[i] = uB[i]; + } else { + repaired[i] = x[i]; + } + } + return repaired; + } + + /** + * @param x Normalized objective variables. + * @param repaired Repaired objective variables. + * @return Penalty value according to the violation of the bounds. + */ + private double penalty(final double[] x, final double[] repaired) { + double penalty = 0; + for (int i = 0; i < x.length; i++) { + double diff = FastMath.abs(x[i] - repaired[i]); + penalty += diff; + } + return isMinimize ? penalty : -penalty; + } + } + + // -----Matrix utility functions similar to the Matlab build in functions------ + + /** + * @param m Input matrix + * @return Matrix representing the element-wise logarithm of m. + */ + private static RealMatrix log(final RealMatrix m) { + final double[][] d = new double[m.getRowDimension()][m.getColumnDimension()]; + for (int r = 0; r < m.getRowDimension(); r++) { + for (int c = 0; c < m.getColumnDimension(); c++) { + d[r][c] = FastMath.log(m.getEntry(r, c)); + } + } + return new Array2DRowRealMatrix(d, false); + } + + /** + * @param m Input matrix. + * @return Matrix representing the element-wise square root of m. + */ + private static RealMatrix sqrt(final RealMatrix m) { + final double[][] d = new double[m.getRowDimension()][m.getColumnDimension()]; + for (int r = 0; r < m.getRowDimension(); r++) { + for (int c = 0; c < m.getColumnDimension(); c++) { + d[r][c] = FastMath.sqrt(m.getEntry(r, c)); + } + } + return new Array2DRowRealMatrix(d, false); + } + + /** + * @param m Input matrix. + * @return Matrix representing the element-wise square of m. + */ + private static RealMatrix square(final RealMatrix m) { + final double[][] d = new double[m.getRowDimension()][m.getColumnDimension()]; + for (int r = 0; r < m.getRowDimension(); r++) { + for (int c = 0; c < m.getColumnDimension(); c++) { + double e = m.getEntry(r, c); + d[r][c] = e * e; + } + } + return new Array2DRowRealMatrix(d, false); + } + + /** + * @param m Input matrix 1. + * @param n Input matrix 2. + * @return the matrix where the elements of m and n are element-wise multiplied. + */ + private static RealMatrix times(final RealMatrix m, final RealMatrix n) { + final double[][] d = new double[m.getRowDimension()][m.getColumnDimension()]; + for (int r = 0; r < m.getRowDimension(); r++) { + for (int c = 0; c < m.getColumnDimension(); c++) { + d[r][c] = m.getEntry(r, c) * n.getEntry(r, c); + } + } + return new Array2DRowRealMatrix(d, false); + } + + /** + * @param m Input matrix 1. + * @param n Input matrix 2. + * @return Matrix where the elements of m and n are element-wise divided. + */ + private static RealMatrix divide(final RealMatrix m, final RealMatrix n) { + final double[][] d = new double[m.getRowDimension()][m.getColumnDimension()]; + for (int r = 0; r < m.getRowDimension(); r++) { + for (int c = 0; c < m.getColumnDimension(); c++) { + d[r][c] = m.getEntry(r, c) / n.getEntry(r, c); + } + } + return new Array2DRowRealMatrix(d, false); + } + + /** + * @param m Input matrix. + * @param cols Columns to select. + * @return Matrix representing the selected columns. + */ + private static RealMatrix selectColumns(final RealMatrix m, final int[] cols) { + final double[][] d = new double[m.getRowDimension()][cols.length]; + for (int r = 0; r < m.getRowDimension(); r++) { + for (int c = 0; c < cols.length; c++) { + d[r][c] = m.getEntry(r, cols[c]); + } + } + return new Array2DRowRealMatrix(d, false); + } + + /** + * @param m Input matrix. + * @param k Diagonal position. + * @return Upper triangular part of matrix. + */ + private static RealMatrix triu(final RealMatrix m, int k) { + final double[][] d = new double[m.getRowDimension()][m.getColumnDimension()]; + for (int r = 0; r < m.getRowDimension(); r++) { + for (int c = 0; c < m.getColumnDimension(); c++) { + d[r][c] = r <= c - k ? m.getEntry(r, c) : 0; + } + } + return new Array2DRowRealMatrix(d, false); + } + + /** + * @param m Input matrix. + * @return Row matrix representing the sums of the rows. + */ + private static RealMatrix sumRows(final RealMatrix m) { + final double[][] d = new double[1][m.getColumnDimension()]; + for (int c = 0; c < m.getColumnDimension(); c++) { + double sum = 0; + for (int r = 0; r < m.getRowDimension(); r++) { + sum += m.getEntry(r, c); + } + d[0][c] = sum; + } + return new Array2DRowRealMatrix(d, false); + } + + /** + * @param m Input matrix. + * @return the diagonal n-by-n matrix if m is a column matrix or the column + * matrix representing the diagonal if m is a n-by-n matrix. + */ + private static RealMatrix diag(final RealMatrix m) { + if (m.getColumnDimension() == 1) { + final double[][] d = new double[m.getRowDimension()][m.getRowDimension()]; + for (int i = 0; i < m.getRowDimension(); i++) { + d[i][i] = m.getEntry(i, 0); + } + return new Array2DRowRealMatrix(d, false); + } else { + final double[][] d = new double[m.getRowDimension()][1]; + for (int i = 0; i < m.getColumnDimension(); i++) { + d[i][0] = m.getEntry(i, i); + } + return new Array2DRowRealMatrix(d, false); + } + } + + /** + * Copies a column from m1 to m2. + * + * @param m1 Source matrix. + * @param col1 Source column. + * @param m2 Target matrix. + * @param col2 Target column. + */ + private static void copyColumn(final RealMatrix m1, int col1, + RealMatrix m2, int col2) { + for (int i = 0; i < m1.getRowDimension(); i++) { + m2.setEntry(i, col2, m1.getEntry(i, col1)); + } + } + + /** + * @param n Number of rows. + * @param m Number of columns. + * @return n-by-m matrix filled with 1. + */ + private static RealMatrix ones(int n, int m) { + final double[][] d = new double[n][m]; + for (int r = 0; r < n; r++) { + Arrays.fill(d[r], 1); + } + return new Array2DRowRealMatrix(d, false); + } + + /** + * @param n Number of rows. + * @param m Number of columns. + * @return n-by-m matrix of 0 values out of diagonal, and 1 values on + * the diagonal. + */ + private static RealMatrix eye(int n, int m) { + final double[][] d = new double[n][m]; + for (int r = 0; r < n; r++) { + if (r < m) { + d[r][r] = 1; + } + } + return new Array2DRowRealMatrix(d, false); + } + + /** + * @param n Number of rows. + * @param m Number of columns. + * @return n-by-m matrix of zero values. + */ + private static RealMatrix zeros(int n, int m) { + return new Array2DRowRealMatrix(n, m); + } + + /** + * @param mat Input matrix. + * @param n Number of row replicates. + * @param m Number of column replicates. + * @return a matrix which replicates the input matrix in both directions. + */ + private static RealMatrix repmat(final RealMatrix mat, int n, int m) { + final int rd = mat.getRowDimension(); + final int cd = mat.getColumnDimension(); + final double[][] d = new double[n * rd][m * cd]; + for (int r = 0; r < n * rd; r++) { + for (int c = 0; c < m * cd; c++) { + d[r][c] = mat.getEntry(r % rd, c % cd); + } + } + return new Array2DRowRealMatrix(d, false); + } + + /** + * @param start Start value. + * @param end End value. + * @param step Step size. + * @return a sequence as column matrix. + */ + private static RealMatrix sequence(double start, double end, double step) { + final int size = (int) ((end - start) / step + 1); + final double[][] d = new double[size][1]; + double value = start; + for (int r = 0; r < size; r++) { + d[r][0] = value; + value += step; + } + return new Array2DRowRealMatrix(d, false); + } + + /** + * @param m Input matrix. + * @return the maximum of the matrix element values. + */ + private static double max(final RealMatrix m) { + double max = -Double.MAX_VALUE; + for (int r = 0; r < m.getRowDimension(); r++) { + for (int c = 0; c < m.getColumnDimension(); c++) { + double e = m.getEntry(r, c); + if (max < e) { + max = e; + } + } + } + return max; + } + + /** + * @param m Input matrix. + * @return the minimum of the matrix element values. + */ + private static double min(final RealMatrix m) { + double min = Double.MAX_VALUE; + for (int r = 0; r < m.getRowDimension(); r++) { + for (int c = 0; c < m.getColumnDimension(); c++) { + double e = m.getEntry(r, c); + if (min > e) { + min = e; + } + } + } + return min; + } + + /** + * @param m Input array. + * @return the maximum of the array values. + */ + private static double max(final double[] m) { + double max = -Double.MAX_VALUE; + for (int r = 0; r < m.length; r++) { + if (max < m[r]) { + max = m[r]; + } + } + return max; + } + + /** + * @param m Input array. + * @return the minimum of the array values. + */ + private static double min(final double[] m) { + double min = Double.MAX_VALUE; + for (int r = 0; r < m.length; r++) { + if (min > m[r]) { + min = m[r]; + } + } + return min; + } + + /** + * @param indices Input index array. + * @return the inverse of the mapping defined by indices. + */ + private static int[] inverse(final int[] indices) { + final int[] inverse = new int[indices.length]; + for (int i = 0; i < indices.length; i++) { + inverse[indices[i]] = i; + } + return inverse; + } + + /** + * @param indices Input index array. + * @return the indices in inverse order (last is first). + */ + private static int[] reverse(final int[] indices) { + final int[] reverse = new int[indices.length]; + for (int i = 0; i < indices.length; i++) { + reverse[i] = indices[indices.length - i - 1]; + } + return reverse; + } + + /** + * @param size Length of random array. + * @return an array of Gaussian random numbers. + */ + private double[] randn(int size) { + final double[] randn = new double[size]; + for (int i = 0; i < size; i++) { + randn[i] = random.nextGaussian(); + } + return randn; + } + + /** + * @param size Number of rows. + * @param popSize Population size. + * @return a 2-dimensional matrix of Gaussian random numbers. + */ + private RealMatrix randn1(int size, int popSize) { + final double[][] d = new double[size][popSize]; + for (int r = 0; r < size; r++) { + for (int c = 0; c < popSize; c++) { + d[r][c] = random.nextGaussian(); + } + } + return new Array2DRowRealMatrix(d, false); + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/noderiv/MultiDirectionalSimplex.java b/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/noderiv/MultiDirectionalSimplex.java new file mode 100644 index 0000000..7ee3acf --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/noderiv/MultiDirectionalSimplex.java @@ -0,0 +1,215 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim.nonlinear.scalar.noderiv; + +import java.util.Comparator; + +import org.apache.commons.math3.analysis.MultivariateFunction; +import org.apache.commons.math3.optim.PointValuePair; + +/** + * This class implements the multi-directional direct search method. + * + * @since 3.0 + */ +public class MultiDirectionalSimplex extends AbstractSimplex { + /** Default value for {@link #khi}: {@value}. */ + private static final double DEFAULT_KHI = 2; + /** Default value for {@link #gamma}: {@value}. */ + private static final double DEFAULT_GAMMA = 0.5; + /** Expansion coefficient. */ + private final double khi; + /** Contraction coefficient. */ + private final double gamma; + + /** + * Build a multi-directional simplex with default coefficients. + * The default values are 2.0 for khi and 0.5 for gamma. + * + * @param n Dimension of the simplex. + */ + public MultiDirectionalSimplex(final int n) { + this(n, 1d); + } + + /** + * Build a multi-directional simplex with default coefficients. + * The default values are 2.0 for khi and 0.5 for gamma. + * + * @param n Dimension of the simplex. + * @param sideLength Length of the sides of the default (hypercube) + * simplex. See {@link AbstractSimplex#AbstractSimplex(int,double)}. + */ + public MultiDirectionalSimplex(final int n, double sideLength) { + this(n, sideLength, DEFAULT_KHI, DEFAULT_GAMMA); + } + + /** + * Build a multi-directional simplex with specified coefficients. + * + * @param n Dimension of the simplex. See + * {@link AbstractSimplex#AbstractSimplex(int,double)}. + * @param khi Expansion coefficient. + * @param gamma Contraction coefficient. + */ + public MultiDirectionalSimplex(final int n, + final double khi, final double gamma) { + this(n, 1d, khi, gamma); + } + + /** + * Build a multi-directional simplex with specified coefficients. + * + * @param n Dimension of the simplex. See + * {@link AbstractSimplex#AbstractSimplex(int,double)}. + * @param sideLength Length of the sides of the default (hypercube) + * simplex. See {@link AbstractSimplex#AbstractSimplex(int,double)}. + * @param khi Expansion coefficient. + * @param gamma Contraction coefficient. + */ + public MultiDirectionalSimplex(final int n, double sideLength, + final double khi, final double gamma) { + super(n, sideLength); + + this.khi = khi; + this.gamma = gamma; + } + + /** + * Build a multi-directional simplex with default coefficients. + * The default values are 2.0 for khi and 0.5 for gamma. + * + * @param steps Steps along the canonical axes representing box edges. + * They may be negative but not zero. See + */ + public MultiDirectionalSimplex(final double[] steps) { + this(steps, DEFAULT_KHI, DEFAULT_GAMMA); + } + + /** + * Build a multi-directional simplex with specified coefficients. + * + * @param steps Steps along the canonical axes representing box edges. + * They may be negative but not zero. See + * {@link AbstractSimplex#AbstractSimplex(double[])}. + * @param khi Expansion coefficient. + * @param gamma Contraction coefficient. + */ + public MultiDirectionalSimplex(final double[] steps, + final double khi, final double gamma) { + super(steps); + + this.khi = khi; + this.gamma = gamma; + } + + /** + * Build a multi-directional simplex with default coefficients. + * The default values are 2.0 for khi and 0.5 for gamma. + * + * @param referenceSimplex Reference simplex. See + * {@link AbstractSimplex#AbstractSimplex(double[][])}. + */ + public MultiDirectionalSimplex(final double[][] referenceSimplex) { + this(referenceSimplex, DEFAULT_KHI, DEFAULT_GAMMA); + } + + /** + * Build a multi-directional simplex with specified coefficients. + * + * @param referenceSimplex Reference simplex. See + * {@link AbstractSimplex#AbstractSimplex(double[][])}. + * @param khi Expansion coefficient. + * @param gamma Contraction coefficient. + * @throws org.apache.commons.math3.exception.NotStrictlyPositiveException + * if the reference simplex does not contain at least one point. + * @throws org.apache.commons.math3.exception.DimensionMismatchException + * if there is a dimension mismatch in the reference simplex. + */ + public MultiDirectionalSimplex(final double[][] referenceSimplex, + final double khi, final double gamma) { + super(referenceSimplex); + + this.khi = khi; + this.gamma = gamma; + } + + /** {@inheritDoc} */ + @Override + public void iterate(final MultivariateFunction evaluationFunction, + final Comparator<PointValuePair> comparator) { + // Save the original simplex. + final PointValuePair[] original = getPoints(); + final PointValuePair best = original[0]; + + // Perform a reflection step. + final PointValuePair reflected = evaluateNewSimplex(evaluationFunction, + original, 1, comparator); + if (comparator.compare(reflected, best) < 0) { + // Compute the expanded simplex. + final PointValuePair[] reflectedSimplex = getPoints(); + final PointValuePair expanded = evaluateNewSimplex(evaluationFunction, + original, khi, comparator); + if (comparator.compare(reflected, expanded) <= 0) { + // Keep the reflected simplex. + setPoints(reflectedSimplex); + } + // Keep the expanded simplex. + return; + } + + // Compute the contracted simplex. + evaluateNewSimplex(evaluationFunction, original, gamma, comparator); + + } + + /** + * Compute and evaluate a new simplex. + * + * @param evaluationFunction Evaluation function. + * @param original Original simplex (to be preserved). + * @param coeff Linear coefficient. + * @param comparator Comparator to use to sort simplex vertices from best + * to poorest. + * @return the best point in the transformed simplex. + * @throws org.apache.commons.math3.exception.TooManyEvaluationsException + * if the maximal number of evaluations is exceeded. + */ + private PointValuePair evaluateNewSimplex(final MultivariateFunction evaluationFunction, + final PointValuePair[] original, + final double coeff, + final Comparator<PointValuePair> comparator) { + final double[] xSmallest = original[0].getPointRef(); + // Perform a linear transformation on all the simplex points, + // except the first one. + setPoint(0, original[0]); + final int dim = getDimension(); + for (int i = 1; i < getSize(); i++) { + final double[] xOriginal = original[i].getPointRef(); + final double[] xTransformed = new double[dim]; + for (int j = 0; j < dim; j++) { + xTransformed[j] = xSmallest[j] + coeff * (xSmallest[j] - xOriginal[j]); + } + setPoint(i, new PointValuePair(xTransformed, Double.NaN, false)); + } + + // Evaluate the simplex. + evaluate(evaluationFunction, comparator); + + return getPoint(0); + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/noderiv/NelderMeadSimplex.java b/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/noderiv/NelderMeadSimplex.java new file mode 100644 index 0000000..f7015ed --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/noderiv/NelderMeadSimplex.java @@ -0,0 +1,280 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim.nonlinear.scalar.noderiv; + +import java.util.Comparator; + +import org.apache.commons.math3.optim.PointValuePair; +import org.apache.commons.math3.analysis.MultivariateFunction; + +/** + * This class implements the Nelder-Mead simplex algorithm. + * + * @since 3.0 + */ +public class NelderMeadSimplex extends AbstractSimplex { + /** Default value for {@link #rho}: {@value}. */ + private static final double DEFAULT_RHO = 1; + /** Default value for {@link #khi}: {@value}. */ + private static final double DEFAULT_KHI = 2; + /** Default value for {@link #gamma}: {@value}. */ + private static final double DEFAULT_GAMMA = 0.5; + /** Default value for {@link #sigma}: {@value}. */ + private static final double DEFAULT_SIGMA = 0.5; + /** Reflection coefficient. */ + private final double rho; + /** Expansion coefficient. */ + private final double khi; + /** Contraction coefficient. */ + private final double gamma; + /** Shrinkage coefficient. */ + private final double sigma; + + /** + * Build a Nelder-Mead simplex with default coefficients. + * The default coefficients are 1.0 for rho, 2.0 for khi and 0.5 + * for both gamma and sigma. + * + * @param n Dimension of the simplex. + */ + public NelderMeadSimplex(final int n) { + this(n, 1d); + } + + /** + * Build a Nelder-Mead simplex with default coefficients. + * The default coefficients are 1.0 for rho, 2.0 for khi and 0.5 + * for both gamma and sigma. + * + * @param n Dimension of the simplex. + * @param sideLength Length of the sides of the default (hypercube) + * simplex. See {@link AbstractSimplex#AbstractSimplex(int,double)}. + */ + public NelderMeadSimplex(final int n, double sideLength) { + this(n, sideLength, + DEFAULT_RHO, DEFAULT_KHI, DEFAULT_GAMMA, DEFAULT_SIGMA); + } + + /** + * Build a Nelder-Mead simplex with specified coefficients. + * + * @param n Dimension of the simplex. See + * {@link AbstractSimplex#AbstractSimplex(int,double)}. + * @param sideLength Length of the sides of the default (hypercube) + * simplex. See {@link AbstractSimplex#AbstractSimplex(int,double)}. + * @param rho Reflection coefficient. + * @param khi Expansion coefficient. + * @param gamma Contraction coefficient. + * @param sigma Shrinkage coefficient. + */ + public NelderMeadSimplex(final int n, double sideLength, + final double rho, final double khi, + final double gamma, final double sigma) { + super(n, sideLength); + + this.rho = rho; + this.khi = khi; + this.gamma = gamma; + this.sigma = sigma; + } + + /** + * Build a Nelder-Mead simplex with specified coefficients. + * + * @param n Dimension of the simplex. See + * {@link AbstractSimplex#AbstractSimplex(int)}. + * @param rho Reflection coefficient. + * @param khi Expansion coefficient. + * @param gamma Contraction coefficient. + * @param sigma Shrinkage coefficient. + */ + public NelderMeadSimplex(final int n, + final double rho, final double khi, + final double gamma, final double sigma) { + this(n, 1d, rho, khi, gamma, sigma); + } + + /** + * Build a Nelder-Mead simplex with default coefficients. + * The default coefficients are 1.0 for rho, 2.0 for khi and 0.5 + * for both gamma and sigma. + * + * @param steps Steps along the canonical axes representing box edges. + * They may be negative but not zero. See + */ + public NelderMeadSimplex(final double[] steps) { + this(steps, DEFAULT_RHO, DEFAULT_KHI, DEFAULT_GAMMA, DEFAULT_SIGMA); + } + + /** + * Build a Nelder-Mead simplex with specified coefficients. + * + * @param steps Steps along the canonical axes representing box edges. + * They may be negative but not zero. See + * {@link AbstractSimplex#AbstractSimplex(double[])}. + * @param rho Reflection coefficient. + * @param khi Expansion coefficient. + * @param gamma Contraction coefficient. + * @param sigma Shrinkage coefficient. + * @throws IllegalArgumentException if one of the steps is zero. + */ + public NelderMeadSimplex(final double[] steps, + final double rho, final double khi, + final double gamma, final double sigma) { + super(steps); + + this.rho = rho; + this.khi = khi; + this.gamma = gamma; + this.sigma = sigma; + } + + /** + * Build a Nelder-Mead simplex with default coefficients. + * The default coefficients are 1.0 for rho, 2.0 for khi and 0.5 + * for both gamma and sigma. + * + * @param referenceSimplex Reference simplex. See + * {@link AbstractSimplex#AbstractSimplex(double[][])}. + */ + public NelderMeadSimplex(final double[][] referenceSimplex) { + this(referenceSimplex, DEFAULT_RHO, DEFAULT_KHI, DEFAULT_GAMMA, DEFAULT_SIGMA); + } + + /** + * Build a Nelder-Mead simplex with specified coefficients. + * + * @param referenceSimplex Reference simplex. See + * {@link AbstractSimplex#AbstractSimplex(double[][])}. + * @param rho Reflection coefficient. + * @param khi Expansion coefficient. + * @param gamma Contraction coefficient. + * @param sigma Shrinkage coefficient. + * @throws org.apache.commons.math3.exception.NotStrictlyPositiveException + * if the reference simplex does not contain at least one point. + * @throws org.apache.commons.math3.exception.DimensionMismatchException + * if there is a dimension mismatch in the reference simplex. + */ + public NelderMeadSimplex(final double[][] referenceSimplex, + final double rho, final double khi, + final double gamma, final double sigma) { + super(referenceSimplex); + + this.rho = rho; + this.khi = khi; + this.gamma = gamma; + this.sigma = sigma; + } + + /** {@inheritDoc} */ + @Override + public void iterate(final MultivariateFunction evaluationFunction, + final Comparator<PointValuePair> comparator) { + // The simplex has n + 1 points if dimension is n. + final int n = getDimension(); + + // Interesting values. + final PointValuePair best = getPoint(0); + final PointValuePair secondBest = getPoint(n - 1); + final PointValuePair worst = getPoint(n); + final double[] xWorst = worst.getPointRef(); + + // Compute the centroid of the best vertices (dismissing the worst + // point at index n). + final double[] centroid = new double[n]; + for (int i = 0; i < n; i++) { + final double[] x = getPoint(i).getPointRef(); + for (int j = 0; j < n; j++) { + centroid[j] += x[j]; + } + } + final double scaling = 1.0 / n; + for (int j = 0; j < n; j++) { + centroid[j] *= scaling; + } + + // compute the reflection point + final double[] xR = new double[n]; + for (int j = 0; j < n; j++) { + xR[j] = centroid[j] + rho * (centroid[j] - xWorst[j]); + } + final PointValuePair reflected + = new PointValuePair(xR, evaluationFunction.value(xR), false); + + if (comparator.compare(best, reflected) <= 0 && + comparator.compare(reflected, secondBest) < 0) { + // Accept the reflected point. + replaceWorstPoint(reflected, comparator); + } else if (comparator.compare(reflected, best) < 0) { + // Compute the expansion point. + final double[] xE = new double[n]; + for (int j = 0; j < n; j++) { + xE[j] = centroid[j] + khi * (xR[j] - centroid[j]); + } + final PointValuePair expanded + = new PointValuePair(xE, evaluationFunction.value(xE), false); + + if (comparator.compare(expanded, reflected) < 0) { + // Accept the expansion point. + replaceWorstPoint(expanded, comparator); + } else { + // Accept the reflected point. + replaceWorstPoint(reflected, comparator); + } + } else { + if (comparator.compare(reflected, worst) < 0) { + // Perform an outside contraction. + final double[] xC = new double[n]; + for (int j = 0; j < n; j++) { + xC[j] = centroid[j] + gamma * (xR[j] - centroid[j]); + } + final PointValuePair outContracted + = new PointValuePair(xC, evaluationFunction.value(xC), false); + if (comparator.compare(outContracted, reflected) <= 0) { + // Accept the contraction point. + replaceWorstPoint(outContracted, comparator); + return; + } + } else { + // Perform an inside contraction. + final double[] xC = new double[n]; + for (int j = 0; j < n; j++) { + xC[j] = centroid[j] - gamma * (centroid[j] - xWorst[j]); + } + final PointValuePair inContracted + = new PointValuePair(xC, evaluationFunction.value(xC), false); + + if (comparator.compare(inContracted, worst) < 0) { + // Accept the contraction point. + replaceWorstPoint(inContracted, comparator); + return; + } + } + + // Perform a shrink. + final double[] xSmallest = getPoint(0).getPointRef(); + for (int i = 1; i <= n; i++) { + final double[] x = getPoint(i).getPoint(); + for (int j = 0; j < n; j++) { + x[j] = xSmallest[j] + sigma * (x[j] - xSmallest[j]); + } + setPoint(i, new PointValuePair(x, Double.NaN, false)); + } + evaluate(evaluationFunction, comparator); + } + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/noderiv/PowellOptimizer.java b/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/noderiv/PowellOptimizer.java new file mode 100644 index 0000000..afa8426 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/noderiv/PowellOptimizer.java @@ -0,0 +1,299 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim.nonlinear.scalar.noderiv; + +import org.apache.commons.math3.util.FastMath; +import org.apache.commons.math3.util.MathArrays; +import org.apache.commons.math3.exception.NumberIsTooSmallException; +import org.apache.commons.math3.exception.NotStrictlyPositiveException; +import org.apache.commons.math3.exception.MathUnsupportedOperationException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.optim.nonlinear.scalar.GoalType; +import org.apache.commons.math3.optim.PointValuePair; +import org.apache.commons.math3.optim.ConvergenceChecker; +import org.apache.commons.math3.optim.nonlinear.scalar.MultivariateOptimizer; +import org.apache.commons.math3.optim.nonlinear.scalar.LineSearch; +import org.apache.commons.math3.optim.univariate.UnivariatePointValuePair; + +/** + * Powell's algorithm. + * This code is translated and adapted from the Python version of this + * algorithm (as implemented in module {@code optimize.py} v0.5 of + * <em>SciPy</em>). + * <br/> + * The default stopping criterion is based on the differences of the + * function value between two successive iterations. It is however possible + * to define a custom convergence checker that might terminate the algorithm + * earlier. + * <br/> + * Line search is performed by the {@link LineSearch} class. + * <br/> + * Constraints are not supported: the call to + * {@link #optimize(OptimizationData[]) optimize} will throw + * {@link MathUnsupportedOperationException} if bounds are passed to it. + * In order to impose simple constraints, the objective function must be + * wrapped in an adapter like + * {@link org.apache.commons.math3.optim.nonlinear.scalar.MultivariateFunctionMappingAdapter + * MultivariateFunctionMappingAdapter} or + * {@link org.apache.commons.math3.optim.nonlinear.scalar.MultivariateFunctionPenaltyAdapter + * MultivariateFunctionPenaltyAdapter}. + * + * @since 2.2 + */ +public class PowellOptimizer + extends MultivariateOptimizer { + /** + * Minimum relative tolerance. + */ + private static final double MIN_RELATIVE_TOLERANCE = 2 * FastMath.ulp(1d); + /** + * Relative threshold. + */ + private final double relativeThreshold; + /** + * Absolute threshold. + */ + private final double absoluteThreshold; + /** + * Line search. + */ + private final LineSearch line; + + /** + * This constructor allows to specify a user-defined convergence checker, + * in addition to the parameters that control the default convergence + * checking procedure. + * <br/> + * The internal line search tolerances are set to the square-root of their + * corresponding value in the multivariate optimizer. + * + * @param rel Relative threshold. + * @param abs Absolute threshold. + * @param checker Convergence checker. + * @throws NotStrictlyPositiveException if {@code abs <= 0}. + * @throws NumberIsTooSmallException if {@code rel < 2 * Math.ulp(1d)}. + */ + public PowellOptimizer(double rel, + double abs, + ConvergenceChecker<PointValuePair> checker) { + this(rel, abs, FastMath.sqrt(rel), FastMath.sqrt(abs), checker); + } + + /** + * This constructor allows to specify a user-defined convergence checker, + * in addition to the parameters that control the default convergence + * checking procedure and the line search tolerances. + * + * @param rel Relative threshold for this optimizer. + * @param abs Absolute threshold for this optimizer. + * @param lineRel Relative threshold for the internal line search optimizer. + * @param lineAbs Absolute threshold for the internal line search optimizer. + * @param checker Convergence checker. + * @throws NotStrictlyPositiveException if {@code abs <= 0}. + * @throws NumberIsTooSmallException if {@code rel < 2 * Math.ulp(1d)}. + */ + public PowellOptimizer(double rel, + double abs, + double lineRel, + double lineAbs, + ConvergenceChecker<PointValuePair> checker) { + super(checker); + + if (rel < MIN_RELATIVE_TOLERANCE) { + throw new NumberIsTooSmallException(rel, MIN_RELATIVE_TOLERANCE, true); + } + if (abs <= 0) { + throw new NotStrictlyPositiveException(abs); + } + relativeThreshold = rel; + absoluteThreshold = abs; + + // Create the line search optimizer. + line = new LineSearch(this, + lineRel, + lineAbs, + 1d); + } + + /** + * The parameters control the default convergence checking procedure. + * <br/> + * The internal line search tolerances are set to the square-root of their + * corresponding value in the multivariate optimizer. + * + * @param rel Relative threshold. + * @param abs Absolute threshold. + * @throws NotStrictlyPositiveException if {@code abs <= 0}. + * @throws NumberIsTooSmallException if {@code rel < 2 * Math.ulp(1d)}. + */ + public PowellOptimizer(double rel, + double abs) { + this(rel, abs, null); + } + + /** + * Builds an instance with the default convergence checking procedure. + * + * @param rel Relative threshold. + * @param abs Absolute threshold. + * @param lineRel Relative threshold for the internal line search optimizer. + * @param lineAbs Absolute threshold for the internal line search optimizer. + * @throws NotStrictlyPositiveException if {@code abs <= 0}. + * @throws NumberIsTooSmallException if {@code rel < 2 * Math.ulp(1d)}. + */ + public PowellOptimizer(double rel, + double abs, + double lineRel, + double lineAbs) { + this(rel, abs, lineRel, lineAbs, null); + } + + /** {@inheritDoc} */ + @Override + protected PointValuePair doOptimize() { + checkParameters(); + + final GoalType goal = getGoalType(); + final double[] guess = getStartPoint(); + final int n = guess.length; + + final double[][] direc = new double[n][n]; + for (int i = 0; i < n; i++) { + direc[i][i] = 1; + } + + final ConvergenceChecker<PointValuePair> checker + = getConvergenceChecker(); + + double[] x = guess; + double fVal = computeObjectiveValue(x); + double[] x1 = x.clone(); + while (true) { + incrementIterationCount(); + + double fX = fVal; + double fX2 = 0; + double delta = 0; + int bigInd = 0; + double alphaMin = 0; + + for (int i = 0; i < n; i++) { + final double[] d = MathArrays.copyOf(direc[i]); + + fX2 = fVal; + + final UnivariatePointValuePair optimum = line.search(x, d); + fVal = optimum.getValue(); + alphaMin = optimum.getPoint(); + final double[][] result = newPointAndDirection(x, d, alphaMin); + x = result[0]; + + if ((fX2 - fVal) > delta) { + delta = fX2 - fVal; + bigInd = i; + } + } + + // Default convergence check. + boolean stop = 2 * (fX - fVal) <= + (relativeThreshold * (FastMath.abs(fX) + FastMath.abs(fVal)) + + absoluteThreshold); + + final PointValuePair previous = new PointValuePair(x1, fX); + final PointValuePair current = new PointValuePair(x, fVal); + if (!stop && checker != null) { // User-defined stopping criteria. + stop = checker.converged(getIterations(), previous, current); + } + if (stop) { + if (goal == GoalType.MINIMIZE) { + return (fVal < fX) ? current : previous; + } else { + return (fVal > fX) ? current : previous; + } + } + + final double[] d = new double[n]; + final double[] x2 = new double[n]; + for (int i = 0; i < n; i++) { + d[i] = x[i] - x1[i]; + x2[i] = 2 * x[i] - x1[i]; + } + + x1 = x.clone(); + fX2 = computeObjectiveValue(x2); + + if (fX > fX2) { + double t = 2 * (fX + fX2 - 2 * fVal); + double temp = fX - fVal - delta; + t *= temp * temp; + temp = fX - fX2; + t -= delta * temp * temp; + + if (t < 0.0) { + final UnivariatePointValuePair optimum = line.search(x, d); + fVal = optimum.getValue(); + alphaMin = optimum.getPoint(); + final double[][] result = newPointAndDirection(x, d, alphaMin); + x = result[0]; + + final int lastInd = n - 1; + direc[bigInd] = direc[lastInd]; + direc[lastInd] = result[1]; + } + } + } + } + + /** + * Compute a new point (in the original space) and a new direction + * vector, resulting from the line search. + * + * @param p Point used in the line search. + * @param d Direction used in the line search. + * @param optimum Optimum found by the line search. + * @return a 2-element array containing the new point (at index 0) and + * the new direction (at index 1). + */ + private double[][] newPointAndDirection(double[] p, + double[] d, + double optimum) { + final int n = p.length; + final double[] nP = new double[n]; + final double[] nD = new double[n]; + for (int i = 0; i < n; i++) { + nD[i] = d[i] * optimum; + nP[i] = p[i] + nD[i]; + } + + final double[][] result = new double[2][]; + result[0] = nP; + result[1] = nD; + + return result; + } + + /** + * @throws MathUnsupportedOperationException if bounds were passed to the + * {@link #optimize(OptimizationData[]) optimize} method. + */ + private void checkParameters() { + if (getLowerBound() != null || + getUpperBound() != null) { + throw new MathUnsupportedOperationException(LocalizedFormats.CONSTRAINT); + } + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/noderiv/SimplexOptimizer.java b/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/noderiv/SimplexOptimizer.java new file mode 100644 index 0000000..4bb6b64 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/noderiv/SimplexOptimizer.java @@ -0,0 +1,222 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim.nonlinear.scalar.noderiv; + +import java.util.Comparator; +import org.apache.commons.math3.analysis.MultivariateFunction; +import org.apache.commons.math3.exception.NullArgumentException; +import org.apache.commons.math3.exception.MathUnsupportedOperationException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.optim.nonlinear.scalar.GoalType; +import org.apache.commons.math3.optim.ConvergenceChecker; +import org.apache.commons.math3.optim.PointValuePair; +import org.apache.commons.math3.optim.SimpleValueChecker; +import org.apache.commons.math3.optim.OptimizationData; +import org.apache.commons.math3.optim.nonlinear.scalar.MultivariateOptimizer; + +/** + * This class implements simplex-based direct search optimization. + * + * <p> + * Direct search methods only use objective function values, they do + * not need derivatives and don't either try to compute approximation + * of the derivatives. According to a 1996 paper by Margaret H. Wright + * (<a href="http://cm.bell-labs.com/cm/cs/doc/96/4-02.ps.gz">Direct + * Search Methods: Once Scorned, Now Respectable</a>), they are used + * when either the computation of the derivative is impossible (noisy + * functions, unpredictable discontinuities) or difficult (complexity, + * computation cost). In the first cases, rather than an optimum, a + * <em>not too bad</em> point is desired. In the latter cases, an + * optimum is desired but cannot be reasonably found. In all cases + * direct search methods can be useful. + * </p> + * <p> + * Simplex-based direct search methods are based on comparison of + * the objective function values at the vertices of a simplex (which is a + * set of n+1 points in dimension n) that is updated by the algorithms + * steps. + * <p> + * <p> + * The simplex update procedure ({@link NelderMeadSimplex} or + * {@link MultiDirectionalSimplex}) must be passed to the + * {@code optimize} method. + * </p> + * <p> + * Each call to {@code optimize} will re-use the start configuration of + * the current simplex and move it such that its first vertex is at the + * provided start point of the optimization. + * If the {@code optimize} method is called to solve a different problem + * and the number of parameters change, the simplex must be re-initialized + * to one with the appropriate dimensions. + * </p> + * <p> + * Convergence is checked by providing the <em>worst</em> points of + * previous and current simplex to the convergence checker, not the best + * ones. + * </p> + * <p> + * This simplex optimizer implementation does not directly support constrained + * optimization with simple bounds; so, for such optimizations, either a more + * dedicated algorithm must be used like + * {@link CMAESOptimizer} or {@link BOBYQAOptimizer}, or the objective + * function must be wrapped in an adapter like + * {@link org.apache.commons.math3.optim.nonlinear.scalar.MultivariateFunctionMappingAdapter + * MultivariateFunctionMappingAdapter} or + * {@link org.apache.commons.math3.optim.nonlinear.scalar.MultivariateFunctionPenaltyAdapter + * MultivariateFunctionPenaltyAdapter}. + * <br/> + * The call to {@link #optimize(OptimizationData[]) optimize} will throw + * {@link MathUnsupportedOperationException} if bounds are passed to it. + * </p> + * + * @since 3.0 + */ +public class SimplexOptimizer extends MultivariateOptimizer { + /** Simplex update rule. */ + private AbstractSimplex simplex; + + /** + * @param checker Convergence checker. + */ + public SimplexOptimizer(ConvergenceChecker<PointValuePair> checker) { + super(checker); + } + + /** + * @param rel Relative threshold. + * @param abs Absolute threshold. + */ + public SimplexOptimizer(double rel, double abs) { + this(new SimpleValueChecker(rel, abs)); + } + + /** + * {@inheritDoc} + * + * @param optData Optimization data. In addition to those documented in + * {@link MultivariateOptimizer#parseOptimizationData(OptimizationData[]) + * MultivariateOptimizer}, this method will register the following data: + * <ul> + * <li>{@link AbstractSimplex}</li> + * </ul> + * @return {@inheritDoc} + */ + @Override + public PointValuePair optimize(OptimizationData... optData) { + // Set up base class and perform computation. + return super.optimize(optData); + } + + /** {@inheritDoc} */ + @Override + protected PointValuePair doOptimize() { + checkParameters(); + + // Indirect call to "computeObjectiveValue" in order to update the + // evaluations counter. + final MultivariateFunction evalFunc + = new MultivariateFunction() { + /** {@inheritDoc} */ + public double value(double[] point) { + return computeObjectiveValue(point); + } + }; + + final boolean isMinim = getGoalType() == GoalType.MINIMIZE; + final Comparator<PointValuePair> comparator + = new Comparator<PointValuePair>() { + /** {@inheritDoc} */ + public int compare(final PointValuePair o1, + final PointValuePair o2) { + final double v1 = o1.getValue(); + final double v2 = o2.getValue(); + return isMinim ? Double.compare(v1, v2) : Double.compare(v2, v1); + } + }; + + // Initialize search. + simplex.build(getStartPoint()); + simplex.evaluate(evalFunc, comparator); + + PointValuePair[] previous = null; + int iteration = 0; + final ConvergenceChecker<PointValuePair> checker = getConvergenceChecker(); + while (true) { + if (getIterations() > 0) { + boolean converged = true; + for (int i = 0; i < simplex.getSize(); i++) { + PointValuePair prev = previous[i]; + converged = converged && + checker.converged(iteration, prev, simplex.getPoint(i)); + } + if (converged) { + // We have found an optimum. + return simplex.getPoint(0); + } + } + + // We still need to search. + previous = simplex.getPoints(); + simplex.iterate(evalFunc, comparator); + + incrementIterationCount(); + } + } + + /** + * Scans the list of (required and optional) optimization data that + * characterize the problem. + * + * @param optData Optimization data. + * The following data will be looked for: + * <ul> + * <li>{@link AbstractSimplex}</li> + * </ul> + */ + @Override + protected void parseOptimizationData(OptimizationData... optData) { + // Allow base class to register its own data. + super.parseOptimizationData(optData); + + // The existing values (as set by the previous call) are reused if + // not provided in the argument list. + for (OptimizationData data : optData) { + if (data instanceof AbstractSimplex) { + simplex = (AbstractSimplex) data; + // If more data must be parsed, this statement _must_ be + // changed to "continue". + break; + } + } + } + + /** + * @throws MathUnsupportedOperationException if bounds were passed to the + * {@link #optimize(OptimizationData[]) optimize} method. + * @throws NullArgumentException if no initial simplex was passed to the + * {@link #optimize(OptimizationData[]) optimize} method. + */ + private void checkParameters() { + if (simplex == null) { + throw new NullArgumentException(); + } + if (getLowerBound() != null || + getUpperBound() != null) { + throw new MathUnsupportedOperationException(LocalizedFormats.CONSTRAINT); + } + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/noderiv/package-info.java b/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/noderiv/package-info.java new file mode 100644 index 0000000..4afeb50 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/noderiv/package-info.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * This package provides optimization algorithms that do not require derivatives. + */ +package org.apache.commons.math3.optim.nonlinear.scalar.noderiv; diff --git a/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/package-info.java b/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/package-info.java new file mode 100644 index 0000000..d65533a --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/nonlinear/scalar/package-info.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Algorithms for optimizing a scalar function. + */ +package org.apache.commons.math3.optim.nonlinear.scalar; diff --git a/src/main/java/org/apache/commons/math3/optim/nonlinear/vector/JacobianMultivariateVectorOptimizer.java b/src/main/java/org/apache/commons/math3/optim/nonlinear/vector/JacobianMultivariateVectorOptimizer.java new file mode 100644 index 0000000..52372c8 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/nonlinear/vector/JacobianMultivariateVectorOptimizer.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim.nonlinear.vector; + +import org.apache.commons.math3.analysis.MultivariateMatrixFunction; +import org.apache.commons.math3.optim.ConvergenceChecker; +import org.apache.commons.math3.optim.OptimizationData; +import org.apache.commons.math3.optim.PointVectorValuePair; +import org.apache.commons.math3.exception.TooManyEvaluationsException; +import org.apache.commons.math3.exception.DimensionMismatchException; + +/** + * Base class for implementing optimizers for multivariate vector + * differentiable functions. + * It contains boiler-plate code for dealing with Jacobian evaluation. + * It assumes that the rows of the Jacobian matrix iterate on the model + * functions while the columns iterate on the parameters; thus, the numbers + * of rows is equal to the dimension of the {@link Target} while the + * number of columns is equal to the dimension of the + * {@link org.apache.commons.math3.optim.InitialGuess InitialGuess}. + * + * @since 3.1 + * @deprecated All classes and interfaces in this package are deprecated. + * The optimizers that were provided here were moved to the + * {@link org.apache.commons.math3.fitting.leastsquares} package + * (cf. MATH-1008). + */ +@Deprecated +public abstract class JacobianMultivariateVectorOptimizer + extends MultivariateVectorOptimizer { + /** + * Jacobian of the model function. + */ + private MultivariateMatrixFunction jacobian; + + /** + * @param checker Convergence checker. + */ + protected JacobianMultivariateVectorOptimizer(ConvergenceChecker<PointVectorValuePair> checker) { + super(checker); + } + + /** + * Computes the Jacobian matrix. + * + * @param params Point at which the Jacobian must be evaluated. + * @return the Jacobian at the specified point. + */ + protected double[][] computeJacobian(final double[] params) { + return jacobian.value(params); + } + + /** + * {@inheritDoc} + * + * @param optData Optimization data. In addition to those documented in + * {@link MultivariateVectorOptimizer#optimize(OptimizationData...)} + * MultivariateOptimizer}, this method will register the following data: + * <ul> + * <li>{@link ModelFunctionJacobian}</li> + * </ul> + * @return {@inheritDoc} + * @throws TooManyEvaluationsException if the maximal number of + * evaluations is exceeded. + * @throws DimensionMismatchException if the initial guess, target, and weight + * arguments have inconsistent dimensions. + */ + @Override + public PointVectorValuePair optimize(OptimizationData... optData) + throws TooManyEvaluationsException, + DimensionMismatchException { + // Set up base class and perform computation. + return super.optimize(optData); + } + + /** + * Scans the list of (required and optional) optimization data that + * characterize the problem. + * + * @param optData Optimization data. + * The following data will be looked for: + * <ul> + * <li>{@link ModelFunctionJacobian}</li> + * </ul> + */ + @Override + protected void parseOptimizationData(OptimizationData... optData) { + // Allow base class to register its own data. + super.parseOptimizationData(optData); + + // The existing values (as set by the previous call) are reused if + // not provided in the argument list. + for (OptimizationData data : optData) { + if (data instanceof ModelFunctionJacobian) { + jacobian = ((ModelFunctionJacobian) data).getModelFunctionJacobian(); + // If more data must be parsed, this statement _must_ be + // changed to "continue". + break; + } + } + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/nonlinear/vector/ModelFunction.java b/src/main/java/org/apache/commons/math3/optim/nonlinear/vector/ModelFunction.java new file mode 100644 index 0000000..73de7d6 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/nonlinear/vector/ModelFunction.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim.nonlinear.vector; + +import org.apache.commons.math3.analysis.MultivariateVectorFunction; +import org.apache.commons.math3.optim.OptimizationData; + +/** + * Model (vector) function to be optimized. + * + * @since 3.1 + * @deprecated All classes and interfaces in this package are deprecated. + * The optimizers that were provided here were moved to the + * {@link org.apache.commons.math3.fitting.leastsquares} package + * (cf. MATH-1008). + */ +@Deprecated +public class ModelFunction implements OptimizationData { + /** Function to be optimized. */ + private final MultivariateVectorFunction model; + + /** + * @param m Model function to be optimized. + */ + public ModelFunction(MultivariateVectorFunction m) { + model = m; + } + + /** + * Gets the model function to be optimized. + * + * @return the model function. + */ + public MultivariateVectorFunction getModelFunction() { + return model; + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/nonlinear/vector/ModelFunctionJacobian.java b/src/main/java/org/apache/commons/math3/optim/nonlinear/vector/ModelFunctionJacobian.java new file mode 100644 index 0000000..72ea4ae --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/nonlinear/vector/ModelFunctionJacobian.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim.nonlinear.vector; + +import org.apache.commons.math3.analysis.MultivariateMatrixFunction; +import org.apache.commons.math3.optim.OptimizationData; + +/** + * Jacobian of the model (vector) function to be optimized. + * + * @since 3.1 + * @deprecated All classes and interfaces in this package are deprecated. + * The optimizers that were provided here were moved to the + * {@link org.apache.commons.math3.fitting.leastsquares} package + * (cf. MATH-1008). + */ +@Deprecated +public class ModelFunctionJacobian implements OptimizationData { + /** Function to be optimized. */ + private final MultivariateMatrixFunction jacobian; + + /** + * @param j Jacobian of the model function to be optimized. + */ + public ModelFunctionJacobian(MultivariateMatrixFunction j) { + jacobian = j; + } + + /** + * Gets the Jacobian of the model function to be optimized. + * + * @return the model function Jacobian. + */ + public MultivariateMatrixFunction getModelFunctionJacobian() { + return jacobian; + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/nonlinear/vector/MultiStartMultivariateVectorOptimizer.java b/src/main/java/org/apache/commons/math3/optim/nonlinear/vector/MultiStartMultivariateVectorOptimizer.java new file mode 100644 index 0000000..2cebf79 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/nonlinear/vector/MultiStartMultivariateVectorOptimizer.java @@ -0,0 +1,124 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim.nonlinear.vector; + +import java.util.Collections; +import java.util.List; +import java.util.ArrayList; +import java.util.Comparator; +import org.apache.commons.math3.exception.NotStrictlyPositiveException; +import org.apache.commons.math3.exception.NullArgumentException; +import org.apache.commons.math3.linear.RealMatrix; +import org.apache.commons.math3.linear.RealVector; +import org.apache.commons.math3.linear.ArrayRealVector; +import org.apache.commons.math3.random.RandomVectorGenerator; +import org.apache.commons.math3.optim.BaseMultiStartMultivariateOptimizer; +import org.apache.commons.math3.optim.PointVectorValuePair; + +/** + * Multi-start optimizer for a (vector) model function. + * + * This class wraps an optimizer in order to use it several times in + * turn with different starting points (trying to avoid being trapped + * in a local extremum when looking for a global one). + * + * @since 3.0 + */ +@Deprecated +public class MultiStartMultivariateVectorOptimizer + extends BaseMultiStartMultivariateOptimizer<PointVectorValuePair> { + /** Underlying optimizer. */ + private final MultivariateVectorOptimizer optimizer; + /** Found optima. */ + private final List<PointVectorValuePair> optima = new ArrayList<PointVectorValuePair>(); + + /** + * Create a multi-start optimizer from a single-start optimizer. + * + * @param optimizer Single-start optimizer to wrap. + * @param starts Number of starts to perform. + * If {@code starts == 1}, the result will be same as if {@code optimizer} + * is called directly. + * @param generator Random vector generator to use for restarts. + * @throws NullArgumentException if {@code optimizer} or {@code generator} + * is {@code null}. + * @throws NotStrictlyPositiveException if {@code starts < 1}. + */ + public MultiStartMultivariateVectorOptimizer(final MultivariateVectorOptimizer optimizer, + final int starts, + final RandomVectorGenerator generator) + throws NullArgumentException, + NotStrictlyPositiveException { + super(optimizer, starts, generator); + this.optimizer = optimizer; + } + + /** + * {@inheritDoc} + */ + @Override + public PointVectorValuePair[] getOptima() { + Collections.sort(optima, getPairComparator()); + return optima.toArray(new PointVectorValuePair[0]); + } + + /** + * {@inheritDoc} + */ + @Override + protected void store(PointVectorValuePair optimum) { + optima.add(optimum); + } + + /** + * {@inheritDoc} + */ + @Override + protected void clear() { + optima.clear(); + } + + /** + * @return a comparator for sorting the optima. + */ + private Comparator<PointVectorValuePair> getPairComparator() { + return new Comparator<PointVectorValuePair>() { + /** Observed value to be matched. */ + private final RealVector target = new ArrayRealVector(optimizer.getTarget(), false); + /** Observations weights. */ + private final RealMatrix weight = optimizer.getWeight(); + + /** {@inheritDoc} */ + public int compare(final PointVectorValuePair o1, + final PointVectorValuePair o2) { + if (o1 == null) { + return (o2 == null) ? 0 : 1; + } else if (o2 == null) { + return -1; + } + return Double.compare(weightedResidual(o1), + weightedResidual(o2)); + } + + private double weightedResidual(final PointVectorValuePair pv) { + final RealVector v = new ArrayRealVector(pv.getValueRef(), false); + final RealVector r = target.subtract(v); + return r.dotProduct(weight.operate(r)); + } + }; + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/nonlinear/vector/MultivariateVectorOptimizer.java b/src/main/java/org/apache/commons/math3/optim/nonlinear/vector/MultivariateVectorOptimizer.java new file mode 100644 index 0000000..c79defa --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/nonlinear/vector/MultivariateVectorOptimizer.java @@ -0,0 +1,167 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.math3.optim.nonlinear.vector; + +import org.apache.commons.math3.exception.TooManyEvaluationsException; +import org.apache.commons.math3.exception.DimensionMismatchException; +import org.apache.commons.math3.analysis.MultivariateVectorFunction; +import org.apache.commons.math3.optim.OptimizationData; +import org.apache.commons.math3.optim.BaseMultivariateOptimizer; +import org.apache.commons.math3.optim.ConvergenceChecker; +import org.apache.commons.math3.optim.PointVectorValuePair; +import org.apache.commons.math3.linear.RealMatrix; + +/** + * Base class for a multivariate vector function optimizer. + * + * @since 3.1 + */ +@Deprecated +public abstract class MultivariateVectorOptimizer + extends BaseMultivariateOptimizer<PointVectorValuePair> { + /** Target values for the model function at optimum. */ + private double[] target; + /** Weight matrix. */ + private RealMatrix weightMatrix; + /** Model function. */ + private MultivariateVectorFunction model; + + /** + * @param checker Convergence checker. + */ + protected MultivariateVectorOptimizer(ConvergenceChecker<PointVectorValuePair> checker) { + super(checker); + } + + /** + * Computes the objective function value. + * This method <em>must</em> be called by subclasses to enforce the + * evaluation counter limit. + * + * @param params Point at which the objective function must be evaluated. + * @return the objective function value at the specified point. + * @throws TooManyEvaluationsException if the maximal number of evaluations + * (of the model vector function) is exceeded. + */ + protected double[] computeObjectiveValue(double[] params) { + super.incrementEvaluationCount(); + return model.value(params); + } + + /** + * {@inheritDoc} + * + * @param optData Optimization data. In addition to those documented in + * {@link BaseMultivariateOptimizer#parseOptimizationData(OptimizationData[]) + * BaseMultivariateOptimizer}, this method will register the following data: + * <ul> + * <li>{@link Target}</li> + * <li>{@link Weight}</li> + * <li>{@link ModelFunction}</li> + * </ul> + * @return {@inheritDoc} + * @throws TooManyEvaluationsException if the maximal number of + * evaluations is exceeded. + * @throws DimensionMismatchException if the initial guess, target, and weight + * arguments have inconsistent dimensions. + */ + @Override + public PointVectorValuePair optimize(OptimizationData... optData) + throws TooManyEvaluationsException, + DimensionMismatchException { + // Set up base class and perform computation. + return super.optimize(optData); + } + + /** + * Gets the weight matrix of the observations. + * + * @return the weight matrix. + */ + public RealMatrix getWeight() { + return weightMatrix.copy(); + } + /** + * Gets the observed values to be matched by the objective vector + * function. + * + * @return the target values. + */ + public double[] getTarget() { + return target.clone(); + } + + /** + * Gets the number of observed values. + * + * @return the length of the target vector. + */ + public int getTargetSize() { + return target.length; + } + + /** + * Scans the list of (required and optional) optimization data that + * characterize the problem. + * + * @param optData Optimization data. The following data will be looked for: + * <ul> + * <li>{@link Target}</li> + * <li>{@link Weight}</li> + * <li>{@link ModelFunction}</li> + * </ul> + */ + @Override + protected void parseOptimizationData(OptimizationData... optData) { + // Allow base class to register its own data. + super.parseOptimizationData(optData); + + // The existing values (as set by the previous call) are reused if + // not provided in the argument list. + for (OptimizationData data : optData) { + if (data instanceof ModelFunction) { + model = ((ModelFunction) data).getModelFunction(); + continue; + } + if (data instanceof Target) { + target = ((Target) data).getTarget(); + continue; + } + if (data instanceof Weight) { + weightMatrix = ((Weight) data).getWeight(); + continue; + } + } + + // Check input consistency. + checkParameters(); + } + + /** + * Check parameters consistency. + * + * @throws DimensionMismatchException if {@link #target} and + * {@link #weightMatrix} have inconsistent dimensions. + */ + private void checkParameters() { + if (target.length != weightMatrix.getColumnDimension()) { + throw new DimensionMismatchException(target.length, + weightMatrix.getColumnDimension()); + } + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/nonlinear/vector/Target.java b/src/main/java/org/apache/commons/math3/optim/nonlinear/vector/Target.java new file mode 100644 index 0000000..cd387d5 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/nonlinear/vector/Target.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim.nonlinear.vector; + +import org.apache.commons.math3.optim.OptimizationData; + +/** + * Target of the optimization procedure. + * They are the values which the objective vector function must reproduce + * When the parameters of the model have been optimized. + * <br/> + * Immutable class. + * + * @since 3.1 + * @deprecated All classes and interfaces in this package are deprecated. + * The optimizers that were provided here were moved to the + * {@link org.apache.commons.math3.fitting.leastsquares} package + * (cf. MATH-1008). + */ +@Deprecated +public class Target implements OptimizationData { + /** Target values (of the objective vector function). */ + private final double[] target; + + /** + * @param observations Target values. + */ + public Target(double[] observations) { + target = observations.clone(); + } + + /** + * Gets the initial guess. + * + * @return the initial guess. + */ + public double[] getTarget() { + return target.clone(); + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/nonlinear/vector/Weight.java b/src/main/java/org/apache/commons/math3/optim/nonlinear/vector/Weight.java new file mode 100644 index 0000000..4d51cd7 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/nonlinear/vector/Weight.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim.nonlinear.vector; + +import org.apache.commons.math3.optim.OptimizationData; +import org.apache.commons.math3.linear.RealMatrix; +import org.apache.commons.math3.linear.DiagonalMatrix; +import org.apache.commons.math3.linear.NonSquareMatrixException; + +/** + * Weight matrix of the residuals between model and observations. + * <br/> + * Immutable class. + * + * @since 3.1 + * @deprecated All classes and interfaces in this package are deprecated. + * The optimizers that were provided here were moved to the + * {@link org.apache.commons.math3.fitting.leastsquares} package + * (cf. MATH-1008). + */ +@Deprecated +public class Weight implements OptimizationData { + /** Weight matrix. */ + private final RealMatrix weightMatrix; + + /** + * Creates a diagonal weight matrix. + * + * @param weight List of the values of the diagonal. + */ + public Weight(double[] weight) { + weightMatrix = new DiagonalMatrix(weight); + } + + /** + * @param weight Weight matrix. + * @throws NonSquareMatrixException if the argument is not + * a square matrix. + */ + public Weight(RealMatrix weight) { + if (weight.getColumnDimension() != weight.getRowDimension()) { + throw new NonSquareMatrixException(weight.getColumnDimension(), + weight.getRowDimension()); + } + + weightMatrix = weight.copy(); + } + + /** + * Gets the initial guess. + * + * @return the initial guess. + */ + public RealMatrix getWeight() { + return weightMatrix.copy(); + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/nonlinear/vector/jacobian/AbstractLeastSquaresOptimizer.java b/src/main/java/org/apache/commons/math3/optim/nonlinear/vector/jacobian/AbstractLeastSquaresOptimizer.java new file mode 100644 index 0000000..67682eb --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/nonlinear/vector/jacobian/AbstractLeastSquaresOptimizer.java @@ -0,0 +1,281 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim.nonlinear.vector.jacobian; + +import org.apache.commons.math3.exception.DimensionMismatchException; +import org.apache.commons.math3.exception.TooManyEvaluationsException; +import org.apache.commons.math3.linear.ArrayRealVector; +import org.apache.commons.math3.linear.RealMatrix; +import org.apache.commons.math3.linear.DiagonalMatrix; +import org.apache.commons.math3.linear.DecompositionSolver; +import org.apache.commons.math3.linear.MatrixUtils; +import org.apache.commons.math3.linear.QRDecomposition; +import org.apache.commons.math3.linear.EigenDecomposition; +import org.apache.commons.math3.optim.OptimizationData; +import org.apache.commons.math3.optim.ConvergenceChecker; +import org.apache.commons.math3.optim.PointVectorValuePair; +import org.apache.commons.math3.optim.nonlinear.vector.Weight; +import org.apache.commons.math3.optim.nonlinear.vector.JacobianMultivariateVectorOptimizer; +import org.apache.commons.math3.util.FastMath; + +/** + * Base class for implementing least-squares optimizers. + * It provides methods for error estimation. + * + * @since 3.1 + * @deprecated All classes and interfaces in this package are deprecated. + * The optimizers that were provided here were moved to the + * {@link org.apache.commons.math3.fitting.leastsquares} package + * (cf. MATH-1008). + */ +@Deprecated +public abstract class AbstractLeastSquaresOptimizer + extends JacobianMultivariateVectorOptimizer { + /** Square-root of the weight matrix. */ + private RealMatrix weightMatrixSqrt; + /** Cost value (square root of the sum of the residuals). */ + private double cost; + + /** + * @param checker Convergence checker. + */ + protected AbstractLeastSquaresOptimizer(ConvergenceChecker<PointVectorValuePair> checker) { + super(checker); + } + + /** + * Computes the weighted Jacobian matrix. + * + * @param params Model parameters at which to compute the Jacobian. + * @return the weighted Jacobian: W<sup>1/2</sup> J. + * @throws DimensionMismatchException if the Jacobian dimension does not + * match problem dimension. + */ + protected RealMatrix computeWeightedJacobian(double[] params) { + return weightMatrixSqrt.multiply(MatrixUtils.createRealMatrix(computeJacobian(params))); + } + + /** + * Computes the cost. + * + * @param residuals Residuals. + * @return the cost. + * @see #computeResiduals(double[]) + */ + protected double computeCost(double[] residuals) { + final ArrayRealVector r = new ArrayRealVector(residuals); + return FastMath.sqrt(r.dotProduct(getWeight().operate(r))); + } + + /** + * Gets the root-mean-square (RMS) value. + * + * The RMS the root of the arithmetic mean of the square of all weighted + * residuals. + * This is related to the criterion that is minimized by the optimizer + * as follows: If <em>c</em> if the criterion, and <em>n</em> is the + * number of measurements, then the RMS is <em>sqrt (c/n)</em>. + * + * @return the RMS value. + */ + public double getRMS() { + return FastMath.sqrt(getChiSquare() / getTargetSize()); + } + + /** + * Get a Chi-Square-like value assuming the N residuals follow N + * distinct normal distributions centered on 0 and whose variances are + * the reciprocal of the weights. + * @return chi-square value + */ + public double getChiSquare() { + return cost * cost; + } + + /** + * Gets the square-root of the weight matrix. + * + * @return the square-root of the weight matrix. + */ + public RealMatrix getWeightSquareRoot() { + return weightMatrixSqrt.copy(); + } + + /** + * Sets the cost. + * + * @param cost Cost value. + */ + protected void setCost(double cost) { + this.cost = cost; + } + + /** + * Get the covariance matrix of the optimized parameters. + * <br/> + * Note that this operation involves the inversion of the + * <code>J<sup>T</sup>J</code> matrix, where {@code J} is the + * Jacobian matrix. + * The {@code threshold} parameter is a way for the caller to specify + * that the result of this computation should be considered meaningless, + * and thus trigger an exception. + * + * @param params Model parameters. + * @param threshold Singularity threshold. + * @return the covariance matrix. + * @throws org.apache.commons.math3.linear.SingularMatrixException + * if the covariance matrix cannot be computed (singular problem). + */ + public double[][] computeCovariances(double[] params, + double threshold) { + // Set up the Jacobian. + final RealMatrix j = computeWeightedJacobian(params); + + // Compute transpose(J)J. + final RealMatrix jTj = j.transpose().multiply(j); + + // Compute the covariances matrix. + final DecompositionSolver solver + = new QRDecomposition(jTj, threshold).getSolver(); + return solver.getInverse().getData(); + } + + /** + * Computes an estimate of the standard deviation of the parameters. The + * returned values are the square root of the diagonal coefficients of the + * covariance matrix, {@code sd(a[i]) ~= sqrt(C[i][i])}, where {@code a[i]} + * is the optimized value of the {@code i}-th parameter, and {@code C} is + * the covariance matrix. + * + * @param params Model parameters. + * @param covarianceSingularityThreshold Singularity threshold (see + * {@link #computeCovariances(double[],double) computeCovariances}). + * @return an estimate of the standard deviation of the optimized parameters + * @throws org.apache.commons.math3.linear.SingularMatrixException + * if the covariance matrix cannot be computed. + */ + public double[] computeSigma(double[] params, + double covarianceSingularityThreshold) { + final int nC = params.length; + final double[] sig = new double[nC]; + final double[][] cov = computeCovariances(params, covarianceSingularityThreshold); + for (int i = 0; i < nC; ++i) { + sig[i] = FastMath.sqrt(cov[i][i]); + } + return sig; + } + + /** + * {@inheritDoc} + * + * @param optData Optimization data. In addition to those documented in + * {@link JacobianMultivariateVectorOptimizer#parseOptimizationData(OptimizationData[]) + * JacobianMultivariateVectorOptimizer}, this method will register the following data: + * <ul> + * <li>{@link org.apache.commons.math3.optim.nonlinear.vector.Weight}</li> + * </ul> + * @return {@inheritDoc} + * @throws TooManyEvaluationsException if the maximal number of + * evaluations is exceeded. + * @throws DimensionMismatchException if the initial guess, target, and weight + * arguments have inconsistent dimensions. + */ + @Override + public PointVectorValuePair optimize(OptimizationData... optData) + throws TooManyEvaluationsException { + // Set up base class and perform computation. + return super.optimize(optData); + } + + /** + * Computes the residuals. + * The residual is the difference between the observed (target) + * values and the model (objective function) value. + * There is one residual for each element of the vector-valued + * function. + * + * @param objectiveValue Value of the the objective function. This is + * the value returned from a call to + * {@link #computeObjectiveValue(double[]) computeObjectiveValue} + * (whose array argument contains the model parameters). + * @return the residuals. + * @throws DimensionMismatchException if {@code params} has a wrong + * length. + */ + protected double[] computeResiduals(double[] objectiveValue) { + final double[] target = getTarget(); + if (objectiveValue.length != target.length) { + throw new DimensionMismatchException(target.length, + objectiveValue.length); + } + + final double[] residuals = new double[target.length]; + for (int i = 0; i < target.length; i++) { + residuals[i] = target[i] - objectiveValue[i]; + } + + return residuals; + } + + /** + * Scans the list of (required and optional) optimization data that + * characterize the problem. + * If the weight matrix is specified, the {@link #weightMatrixSqrt} + * field is recomputed. + * + * @param optData Optimization data. The following data will be looked for: + * <ul> + * <li>{@link Weight}</li> + * </ul> + */ + @Override + protected void parseOptimizationData(OptimizationData... optData) { + // Allow base class to register its own data. + super.parseOptimizationData(optData); + + // The existing values (as set by the previous call) are reused if + // not provided in the argument list. + for (OptimizationData data : optData) { + if (data instanceof Weight) { + weightMatrixSqrt = squareRoot(((Weight) data).getWeight()); + // If more data must be parsed, this statement _must_ be + // changed to "continue". + break; + } + } + } + + /** + * Computes the square-root of the weight matrix. + * + * @param m Symmetric, positive-definite (weight) matrix. + * @return the square-root of the weight matrix. + */ + private RealMatrix squareRoot(RealMatrix m) { + if (m instanceof DiagonalMatrix) { + final int dim = m.getRowDimension(); + final RealMatrix sqrtM = new DiagonalMatrix(dim); + for (int i = 0; i < dim; i++) { + sqrtM.setEntry(i, i, FastMath.sqrt(m.getEntry(i, i))); + } + return sqrtM; + } else { + final EigenDecomposition dec = new EigenDecomposition(m); + return dec.getSquareRoot(); + } + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/nonlinear/vector/jacobian/GaussNewtonOptimizer.java b/src/main/java/org/apache/commons/math3/optim/nonlinear/vector/jacobian/GaussNewtonOptimizer.java new file mode 100644 index 0000000..0668475 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/nonlinear/vector/jacobian/GaussNewtonOptimizer.java @@ -0,0 +1,183 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim.nonlinear.vector.jacobian; + +import org.apache.commons.math3.exception.ConvergenceException; +import org.apache.commons.math3.exception.NullArgumentException; +import org.apache.commons.math3.exception.MathInternalError; +import org.apache.commons.math3.exception.MathUnsupportedOperationException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.linear.ArrayRealVector; +import org.apache.commons.math3.linear.BlockRealMatrix; +import org.apache.commons.math3.linear.DecompositionSolver; +import org.apache.commons.math3.linear.LUDecomposition; +import org.apache.commons.math3.linear.QRDecomposition; +import org.apache.commons.math3.linear.RealMatrix; +import org.apache.commons.math3.linear.SingularMatrixException; +import org.apache.commons.math3.optim.ConvergenceChecker; +import org.apache.commons.math3.optim.PointVectorValuePair; + +/** + * Gauss-Newton least-squares solver. + * <br/> + * Constraints are not supported: the call to + * {@link #optimize(OptimizationData[]) optimize} will throw + * {@link MathUnsupportedOperationException} if bounds are passed to it. + * + * <p> + * This class solve a least-square problem by solving the normal equations + * of the linearized problem at each iteration. Either LU decomposition or + * QR decomposition can be used to solve the normal equations. LU decomposition + * is faster but QR decomposition is more robust for difficult problems. + * </p> + * + * @since 2.0 + * @deprecated All classes and interfaces in this package are deprecated. + * The optimizers that were provided here were moved to the + * {@link org.apache.commons.math3.fitting.leastsquares} package + * (cf. MATH-1008). + */ +@Deprecated +public class GaussNewtonOptimizer extends AbstractLeastSquaresOptimizer { + /** Indicator for using LU decomposition. */ + private final boolean useLU; + + /** + * Simple constructor with default settings. + * The normal equations will be solved using LU decomposition. + * + * @param checker Convergence checker. + */ + public GaussNewtonOptimizer(ConvergenceChecker<PointVectorValuePair> checker) { + this(true, checker); + } + + /** + * @param useLU If {@code true}, the normal equations will be solved + * using LU decomposition, otherwise they will be solved using QR + * decomposition. + * @param checker Convergence checker. + */ + public GaussNewtonOptimizer(final boolean useLU, + ConvergenceChecker<PointVectorValuePair> checker) { + super(checker); + this.useLU = useLU; + } + + /** {@inheritDoc} */ + @Override + public PointVectorValuePair doOptimize() { + checkParameters(); + + final ConvergenceChecker<PointVectorValuePair> checker + = getConvergenceChecker(); + + // Computation will be useless without a checker (see "for-loop"). + if (checker == null) { + throw new NullArgumentException(); + } + + final double[] targetValues = getTarget(); + final int nR = targetValues.length; // Number of observed data. + + final RealMatrix weightMatrix = getWeight(); + // Diagonal of the weight matrix. + final double[] residualsWeights = new double[nR]; + for (int i = 0; i < nR; i++) { + residualsWeights[i] = weightMatrix.getEntry(i, i); + } + + final double[] currentPoint = getStartPoint(); + final int nC = currentPoint.length; + + // iterate until convergence is reached + PointVectorValuePair current = null; + for (boolean converged = false; !converged;) { + incrementIterationCount(); + + // evaluate the objective function and its jacobian + PointVectorValuePair previous = current; + // Value of the objective function at "currentPoint". + final double[] currentObjective = computeObjectiveValue(currentPoint); + final double[] currentResiduals = computeResiduals(currentObjective); + final RealMatrix weightedJacobian = computeWeightedJacobian(currentPoint); + current = new PointVectorValuePair(currentPoint, currentObjective); + + // build the linear problem + final double[] b = new double[nC]; + final double[][] a = new double[nC][nC]; + for (int i = 0; i < nR; ++i) { + + final double[] grad = weightedJacobian.getRow(i); + final double weight = residualsWeights[i]; + final double residual = currentResiduals[i]; + + // compute the normal equation + final double wr = weight * residual; + for (int j = 0; j < nC; ++j) { + b[j] += wr * grad[j]; + } + + // build the contribution matrix for measurement i + for (int k = 0; k < nC; ++k) { + double[] ak = a[k]; + double wgk = weight * grad[k]; + for (int l = 0; l < nC; ++l) { + ak[l] += wgk * grad[l]; + } + } + } + + // Check convergence. + if (previous != null) { + converged = checker.converged(getIterations(), previous, current); + if (converged) { + setCost(computeCost(currentResiduals)); + return current; + } + } + + try { + // solve the linearized least squares problem + RealMatrix mA = new BlockRealMatrix(a); + DecompositionSolver solver = useLU ? + new LUDecomposition(mA).getSolver() : + new QRDecomposition(mA).getSolver(); + final double[] dX = solver.solve(new ArrayRealVector(b, false)).toArray(); + // update the estimated parameters + for (int i = 0; i < nC; ++i) { + currentPoint[i] += dX[i]; + } + } catch (SingularMatrixException e) { + throw new ConvergenceException(LocalizedFormats.UNABLE_TO_SOLVE_SINGULAR_PROBLEM); + } + } + // Must never happen. + throw new MathInternalError(); + } + + /** + * @throws MathUnsupportedOperationException if bounds were passed to the + * {@link #optimize(OptimizationData[]) optimize} method. + */ + private void checkParameters() { + if (getLowerBound() != null || + getUpperBound() != null) { + throw new MathUnsupportedOperationException(LocalizedFormats.CONSTRAINT); + } + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/nonlinear/vector/jacobian/LevenbergMarquardtOptimizer.java b/src/main/java/org/apache/commons/math3/optim/nonlinear/vector/jacobian/LevenbergMarquardtOptimizer.java new file mode 100644 index 0000000..05be0d0 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/nonlinear/vector/jacobian/LevenbergMarquardtOptimizer.java @@ -0,0 +1,961 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim.nonlinear.vector.jacobian; + +import java.util.Arrays; +import org.apache.commons.math3.exception.ConvergenceException; +import org.apache.commons.math3.exception.MathUnsupportedOperationException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.optim.PointVectorValuePair; +import org.apache.commons.math3.optim.ConvergenceChecker; +import org.apache.commons.math3.linear.RealMatrix; +import org.apache.commons.math3.util.Precision; +import org.apache.commons.math3.util.FastMath; + + +/** + * This class solves a least-squares problem using the Levenberg-Marquardt + * algorithm. + * <br/> + * Constraints are not supported: the call to + * {@link #optimize(OptimizationData[]) optimize} will throw + * {@link MathUnsupportedOperationException} if bounds are passed to it. + * + * <p>This implementation <em>should</em> work even for over-determined systems + * (i.e. systems having more point than equations). Over-determined systems + * are solved by ignoring the point which have the smallest impact according + * to their jacobian column norm. Only the rank of the matrix and some loop bounds + * are changed to implement this.</p> + * + * <p>The resolution engine is a simple translation of the MINPACK <a + * href="http://www.netlib.org/minpack/lmder.f">lmder</a> routine with minor + * changes. The changes include the over-determined resolution, the use of + * inherited convergence checker and the Q.R. decomposition which has been + * rewritten following the algorithm described in the + * P. Lascaux and R. Theodor book <i>Analyse numérique matricielle + * appliquée à l'art de l'ingénieur</i>, Masson 1986.</p> + * <p>The authors of the original fortran version are: + * <ul> + * <li>Argonne National Laboratory. MINPACK project. March 1980</li> + * <li>Burton S. Garbow</li> + * <li>Kenneth E. Hillstrom</li> + * <li>Jorge J. More</li> + * </ul> + * The redistribution policy for MINPACK is available <a + * href="http://www.netlib.org/minpack/disclaimer">here</a>, for convenience, it + * is reproduced below.</p> + * + * <table border="0" width="80%" cellpadding="10" align="center" bgcolor="#E0E0E0"> + * <tr><td> + * Minpack Copyright Notice (1999) University of Chicago. + * All rights reserved + * </td></tr> + * <tr><td> + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * <ol> + * <li>Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer.</li> + * <li>Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution.</li> + * <li>The end-user documentation included with the redistribution, if any, + * must include the following acknowledgment: + * <code>This product includes software developed by the University of + * Chicago, as Operator of Argonne National Laboratory.</code> + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear.</li> + * <li><strong>WARRANTY DISCLAIMER. THE SOFTWARE IS SUPPLIED "AS IS" + * WITHOUT WARRANTY OF ANY KIND. THE COPYRIGHT HOLDER, THE + * UNITED STATES, THE UNITED STATES DEPARTMENT OF ENERGY, AND + * THEIR EMPLOYEES: (1) DISCLAIM ANY WARRANTIES, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO ANY IMPLIED WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE + * OR NON-INFRINGEMENT, (2) DO NOT ASSUME ANY LEGAL LIABILITY + * OR RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR + * USEFULNESS OF THE SOFTWARE, (3) DO NOT REPRESENT THAT USE OF + * THE SOFTWARE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS, (4) + * DO NOT WARRANT THAT THE SOFTWARE WILL FUNCTION + * UNINTERRUPTED, THAT IT IS ERROR-FREE OR THAT ANY ERRORS WILL + * BE CORRECTED.</strong></li> + * <li><strong>LIMITATION OF LIABILITY. IN NO EVENT WILL THE COPYRIGHT + * HOLDER, THE UNITED STATES, THE UNITED STATES DEPARTMENT OF + * ENERGY, OR THEIR EMPLOYEES: BE LIABLE FOR ANY INDIRECT, + * INCIDENTAL, CONSEQUENTIAL, SPECIAL OR PUNITIVE DAMAGES OF + * ANY KIND OR NATURE, INCLUDING BUT NOT LIMITED TO LOSS OF + * PROFITS OR LOSS OF DATA, FOR ANY REASON WHATSOEVER, WHETHER + * SUCH LIABILITY IS ASSERTED ON THE BASIS OF CONTRACT, TORT + * (INCLUDING NEGLIGENCE OR STRICT LIABILITY), OR OTHERWISE, + * EVEN IF ANY OF SAID PARTIES HAS BEEN WARNED OF THE + * POSSIBILITY OF SUCH LOSS OR DAMAGES.</strong></li> + * <ol></td></tr> + * </table> + * + * @since 2.0 + * @deprecated All classes and interfaces in this package are deprecated. + * The optimizers that were provided here were moved to the + * {@link org.apache.commons.math3.fitting.leastsquares} package + * (cf. MATH-1008). + */ +@Deprecated +public class LevenbergMarquardtOptimizer + extends AbstractLeastSquaresOptimizer { + /** Twice the "epsilon machine". */ + private static final double TWO_EPS = 2 * Precision.EPSILON; + /** Number of solved point. */ + private int solvedCols; + /** Diagonal elements of the R matrix in the Q.R. decomposition. */ + private double[] diagR; + /** Norms of the columns of the jacobian matrix. */ + private double[] jacNorm; + /** Coefficients of the Householder transforms vectors. */ + private double[] beta; + /** Columns permutation array. */ + private int[] permutation; + /** Rank of the jacobian matrix. */ + private int rank; + /** Levenberg-Marquardt parameter. */ + private double lmPar; + /** Parameters evolution direction associated with lmPar. */ + private double[] lmDir; + /** Positive input variable used in determining the initial step bound. */ + private final double initialStepBoundFactor; + /** Desired relative error in the sum of squares. */ + private final double costRelativeTolerance; + /** Desired relative error in the approximate solution parameters. */ + private final double parRelativeTolerance; + /** Desired max cosine on the orthogonality between the function vector + * and the columns of the jacobian. */ + private final double orthoTolerance; + /** Threshold for QR ranking. */ + private final double qrRankingThreshold; + /** Weighted residuals. */ + private double[] weightedResidual; + /** Weighted Jacobian. */ + private double[][] weightedJacobian; + + /** + * Build an optimizer for least squares problems with default values + * for all the tuning parameters (see the {@link + * #LevenbergMarquardtOptimizer(double,double,double,double,double) + * other contructor}. + * The default values for the algorithm settings are: + * <ul> + * <li>Initial step bound factor: 100</li> + * <li>Cost relative tolerance: 1e-10</li> + * <li>Parameters relative tolerance: 1e-10</li> + * <li>Orthogonality tolerance: 1e-10</li> + * <li>QR ranking threshold: {@link Precision#SAFE_MIN}</li> + * </ul> + */ + public LevenbergMarquardtOptimizer() { + this(100, 1e-10, 1e-10, 1e-10, Precision.SAFE_MIN); + } + + /** + * Constructor that allows the specification of a custom convergence + * checker. + * Note that all the usual convergence checks will be <em>disabled</em>. + * The default values for the algorithm settings are: + * <ul> + * <li>Initial step bound factor: 100</li> + * <li>Cost relative tolerance: 1e-10</li> + * <li>Parameters relative tolerance: 1e-10</li> + * <li>Orthogonality tolerance: 1e-10</li> + * <li>QR ranking threshold: {@link Precision#SAFE_MIN}</li> + * </ul> + * + * @param checker Convergence checker. + */ + public LevenbergMarquardtOptimizer(ConvergenceChecker<PointVectorValuePair> checker) { + this(100, checker, 1e-10, 1e-10, 1e-10, Precision.SAFE_MIN); + } + + /** + * Constructor that allows the specification of a custom convergence + * checker, in addition to the standard ones. + * + * @param initialStepBoundFactor Positive input variable used in + * determining the initial step bound. This bound is set to the + * product of initialStepBoundFactor and the euclidean norm of + * {@code diag * x} if non-zero, or else to {@code initialStepBoundFactor} + * itself. In most cases factor should lie in the interval + * {@code (0.1, 100.0)}. {@code 100} is a generally recommended value. + * @param checker Convergence checker. + * @param costRelativeTolerance Desired relative error in the sum of + * squares. + * @param parRelativeTolerance Desired relative error in the approximate + * solution parameters. + * @param orthoTolerance Desired max cosine on the orthogonality between + * the function vector and the columns of the Jacobian. + * @param threshold Desired threshold for QR ranking. If the squared norm + * of a column vector is smaller or equal to this threshold during QR + * decomposition, it is considered to be a zero vector and hence the rank + * of the matrix is reduced. + */ + public LevenbergMarquardtOptimizer(double initialStepBoundFactor, + ConvergenceChecker<PointVectorValuePair> checker, + double costRelativeTolerance, + double parRelativeTolerance, + double orthoTolerance, + double threshold) { + super(checker); + this.initialStepBoundFactor = initialStepBoundFactor; + this.costRelativeTolerance = costRelativeTolerance; + this.parRelativeTolerance = parRelativeTolerance; + this.orthoTolerance = orthoTolerance; + this.qrRankingThreshold = threshold; + } + + /** + * Build an optimizer for least squares problems with default values + * for some of the tuning parameters (see the {@link + * #LevenbergMarquardtOptimizer(double,double,double,double,double) + * other contructor}. + * The default values for the algorithm settings are: + * <ul> + * <li>Initial step bound factor}: 100</li> + * <li>QR ranking threshold}: {@link Precision#SAFE_MIN}</li> + * </ul> + * + * @param costRelativeTolerance Desired relative error in the sum of + * squares. + * @param parRelativeTolerance Desired relative error in the approximate + * solution parameters. + * @param orthoTolerance Desired max cosine on the orthogonality between + * the function vector and the columns of the Jacobian. + */ + public LevenbergMarquardtOptimizer(double costRelativeTolerance, + double parRelativeTolerance, + double orthoTolerance) { + this(100, + costRelativeTolerance, parRelativeTolerance, orthoTolerance, + Precision.SAFE_MIN); + } + + /** + * The arguments control the behaviour of the default convergence checking + * procedure. + * Additional criteria can defined through the setting of a {@link + * ConvergenceChecker}. + * + * @param initialStepBoundFactor Positive input variable used in + * determining the initial step bound. This bound is set to the + * product of initialStepBoundFactor and the euclidean norm of + * {@code diag * x} if non-zero, or else to {@code initialStepBoundFactor} + * itself. In most cases factor should lie in the interval + * {@code (0.1, 100.0)}. {@code 100} is a generally recommended value. + * @param costRelativeTolerance Desired relative error in the sum of + * squares. + * @param parRelativeTolerance Desired relative error in the approximate + * solution parameters. + * @param orthoTolerance Desired max cosine on the orthogonality between + * the function vector and the columns of the Jacobian. + * @param threshold Desired threshold for QR ranking. If the squared norm + * of a column vector is smaller or equal to this threshold during QR + * decomposition, it is considered to be a zero vector and hence the rank + * of the matrix is reduced. + */ + public LevenbergMarquardtOptimizer(double initialStepBoundFactor, + double costRelativeTolerance, + double parRelativeTolerance, + double orthoTolerance, + double threshold) { + super(null); // No custom convergence criterion. + this.initialStepBoundFactor = initialStepBoundFactor; + this.costRelativeTolerance = costRelativeTolerance; + this.parRelativeTolerance = parRelativeTolerance; + this.orthoTolerance = orthoTolerance; + this.qrRankingThreshold = threshold; + } + + /** {@inheritDoc} */ + @Override + protected PointVectorValuePair doOptimize() { + checkParameters(); + + final int nR = getTarget().length; // Number of observed data. + final double[] currentPoint = getStartPoint(); + final int nC = currentPoint.length; // Number of parameters. + + // arrays shared with the other private methods + solvedCols = FastMath.min(nR, nC); + diagR = new double[nC]; + jacNorm = new double[nC]; + beta = new double[nC]; + permutation = new int[nC]; + lmDir = new double[nC]; + + // local point + double delta = 0; + double xNorm = 0; + double[] diag = new double[nC]; + double[] oldX = new double[nC]; + double[] oldRes = new double[nR]; + double[] oldObj = new double[nR]; + double[] qtf = new double[nR]; + double[] work1 = new double[nC]; + double[] work2 = new double[nC]; + double[] work3 = new double[nC]; + + final RealMatrix weightMatrixSqrt = getWeightSquareRoot(); + + // Evaluate the function at the starting point and calculate its norm. + double[] currentObjective = computeObjectiveValue(currentPoint); + double[] currentResiduals = computeResiduals(currentObjective); + PointVectorValuePair current = new PointVectorValuePair(currentPoint, currentObjective); + double currentCost = computeCost(currentResiduals); + + // Outer loop. + lmPar = 0; + boolean firstIteration = true; + final ConvergenceChecker<PointVectorValuePair> checker = getConvergenceChecker(); + while (true) { + incrementIterationCount(); + + final PointVectorValuePair previous = current; + + // QR decomposition of the jacobian matrix + qrDecomposition(computeWeightedJacobian(currentPoint)); + + weightedResidual = weightMatrixSqrt.operate(currentResiduals); + for (int i = 0; i < nR; i++) { + qtf[i] = weightedResidual[i]; + } + + // compute Qt.res + qTy(qtf); + + // now we don't need Q anymore, + // so let jacobian contain the R matrix with its diagonal elements + for (int k = 0; k < solvedCols; ++k) { + int pk = permutation[k]; + weightedJacobian[k][pk] = diagR[pk]; + } + + if (firstIteration) { + // scale the point according to the norms of the columns + // of the initial jacobian + xNorm = 0; + for (int k = 0; k < nC; ++k) { + double dk = jacNorm[k]; + if (dk == 0) { + dk = 1.0; + } + double xk = dk * currentPoint[k]; + xNorm += xk * xk; + diag[k] = dk; + } + xNorm = FastMath.sqrt(xNorm); + + // initialize the step bound delta + delta = (xNorm == 0) ? initialStepBoundFactor : (initialStepBoundFactor * xNorm); + } + + // check orthogonality between function vector and jacobian columns + double maxCosine = 0; + if (currentCost != 0) { + for (int j = 0; j < solvedCols; ++j) { + int pj = permutation[j]; + double s = jacNorm[pj]; + if (s != 0) { + double sum = 0; + for (int i = 0; i <= j; ++i) { + sum += weightedJacobian[i][pj] * qtf[i]; + } + maxCosine = FastMath.max(maxCosine, FastMath.abs(sum) / (s * currentCost)); + } + } + } + if (maxCosine <= orthoTolerance) { + // Convergence has been reached. + setCost(currentCost); + return current; + } + + // rescale if necessary + for (int j = 0; j < nC; ++j) { + diag[j] = FastMath.max(diag[j], jacNorm[j]); + } + + // Inner loop. + for (double ratio = 0; ratio < 1.0e-4;) { + + // save the state + for (int j = 0; j < solvedCols; ++j) { + int pj = permutation[j]; + oldX[pj] = currentPoint[pj]; + } + final double previousCost = currentCost; + double[] tmpVec = weightedResidual; + weightedResidual = oldRes; + oldRes = tmpVec; + tmpVec = currentObjective; + currentObjective = oldObj; + oldObj = tmpVec; + + // determine the Levenberg-Marquardt parameter + determineLMParameter(qtf, delta, diag, work1, work2, work3); + + // compute the new point and the norm of the evolution direction + double lmNorm = 0; + for (int j = 0; j < solvedCols; ++j) { + int pj = permutation[j]; + lmDir[pj] = -lmDir[pj]; + currentPoint[pj] = oldX[pj] + lmDir[pj]; + double s = diag[pj] * lmDir[pj]; + lmNorm += s * s; + } + lmNorm = FastMath.sqrt(lmNorm); + // on the first iteration, adjust the initial step bound. + if (firstIteration) { + delta = FastMath.min(delta, lmNorm); + } + + // Evaluate the function at x + p and calculate its norm. + currentObjective = computeObjectiveValue(currentPoint); + currentResiduals = computeResiduals(currentObjective); + current = new PointVectorValuePair(currentPoint, currentObjective); + currentCost = computeCost(currentResiduals); + + // compute the scaled actual reduction + double actRed = -1.0; + if (0.1 * currentCost < previousCost) { + double r = currentCost / previousCost; + actRed = 1.0 - r * r; + } + + // compute the scaled predicted reduction + // and the scaled directional derivative + for (int j = 0; j < solvedCols; ++j) { + int pj = permutation[j]; + double dirJ = lmDir[pj]; + work1[j] = 0; + for (int i = 0; i <= j; ++i) { + work1[i] += weightedJacobian[i][pj] * dirJ; + } + } + double coeff1 = 0; + for (int j = 0; j < solvedCols; ++j) { + coeff1 += work1[j] * work1[j]; + } + double pc2 = previousCost * previousCost; + coeff1 /= pc2; + double coeff2 = lmPar * lmNorm * lmNorm / pc2; + double preRed = coeff1 + 2 * coeff2; + double dirDer = -(coeff1 + coeff2); + + // ratio of the actual to the predicted reduction + ratio = (preRed == 0) ? 0 : (actRed / preRed); + + // update the step bound + if (ratio <= 0.25) { + double tmp = + (actRed < 0) ? (0.5 * dirDer / (dirDer + 0.5 * actRed)) : 0.5; + if ((0.1 * currentCost >= previousCost) || (tmp < 0.1)) { + tmp = 0.1; + } + delta = tmp * FastMath.min(delta, 10.0 * lmNorm); + lmPar /= tmp; + } else if ((lmPar == 0) || (ratio >= 0.75)) { + delta = 2 * lmNorm; + lmPar *= 0.5; + } + + // test for successful iteration. + if (ratio >= 1.0e-4) { + // successful iteration, update the norm + firstIteration = false; + xNorm = 0; + for (int k = 0; k < nC; ++k) { + double xK = diag[k] * currentPoint[k]; + xNorm += xK * xK; + } + xNorm = FastMath.sqrt(xNorm); + + // tests for convergence. + if (checker != null && checker.converged(getIterations(), previous, current)) { + setCost(currentCost); + return current; + } + } else { + // failed iteration, reset the previous values + currentCost = previousCost; + for (int j = 0; j < solvedCols; ++j) { + int pj = permutation[j]; + currentPoint[pj] = oldX[pj]; + } + tmpVec = weightedResidual; + weightedResidual = oldRes; + oldRes = tmpVec; + tmpVec = currentObjective; + currentObjective = oldObj; + oldObj = tmpVec; + // Reset "current" to previous values. + current = new PointVectorValuePair(currentPoint, currentObjective); + } + + // Default convergence criteria. + if ((FastMath.abs(actRed) <= costRelativeTolerance && + preRed <= costRelativeTolerance && + ratio <= 2.0) || + delta <= parRelativeTolerance * xNorm) { + setCost(currentCost); + return current; + } + + // tests for termination and stringent tolerances + if (FastMath.abs(actRed) <= TWO_EPS && + preRed <= TWO_EPS && + ratio <= 2.0) { + throw new ConvergenceException(LocalizedFormats.TOO_SMALL_COST_RELATIVE_TOLERANCE, + costRelativeTolerance); + } else if (delta <= TWO_EPS * xNorm) { + throw new ConvergenceException(LocalizedFormats.TOO_SMALL_PARAMETERS_RELATIVE_TOLERANCE, + parRelativeTolerance); + } else if (maxCosine <= TWO_EPS) { + throw new ConvergenceException(LocalizedFormats.TOO_SMALL_ORTHOGONALITY_TOLERANCE, + orthoTolerance); + } + } + } + } + + /** + * Determine the Levenberg-Marquardt parameter. + * <p>This implementation is a translation in Java of the MINPACK + * <a href="http://www.netlib.org/minpack/lmpar.f">lmpar</a> + * routine.</p> + * <p>This method sets the lmPar and lmDir attributes.</p> + * <p>The authors of the original fortran function are:</p> + * <ul> + * <li>Argonne National Laboratory. MINPACK project. March 1980</li> + * <li>Burton S. Garbow</li> + * <li>Kenneth E. Hillstrom</li> + * <li>Jorge J. More</li> + * </ul> + * <p>Luc Maisonobe did the Java translation.</p> + * + * @param qy array containing qTy + * @param delta upper bound on the euclidean norm of diagR * lmDir + * @param diag diagonal matrix + * @param work1 work array + * @param work2 work array + * @param work3 work array + */ + private void determineLMParameter(double[] qy, double delta, double[] diag, + double[] work1, double[] work2, double[] work3) { + final int nC = weightedJacobian[0].length; + + // compute and store in x the gauss-newton direction, if the + // jacobian is rank-deficient, obtain a least squares solution + for (int j = 0; j < rank; ++j) { + lmDir[permutation[j]] = qy[j]; + } + for (int j = rank; j < nC; ++j) { + lmDir[permutation[j]] = 0; + } + for (int k = rank - 1; k >= 0; --k) { + int pk = permutation[k]; + double ypk = lmDir[pk] / diagR[pk]; + for (int i = 0; i < k; ++i) { + lmDir[permutation[i]] -= ypk * weightedJacobian[i][pk]; + } + lmDir[pk] = ypk; + } + + // evaluate the function at the origin, and test + // for acceptance of the Gauss-Newton direction + double dxNorm = 0; + for (int j = 0; j < solvedCols; ++j) { + int pj = permutation[j]; + double s = diag[pj] * lmDir[pj]; + work1[pj] = s; + dxNorm += s * s; + } + dxNorm = FastMath.sqrt(dxNorm); + double fp = dxNorm - delta; + if (fp <= 0.1 * delta) { + lmPar = 0; + return; + } + + // if the jacobian is not rank deficient, the Newton step provides + // a lower bound, parl, for the zero of the function, + // otherwise set this bound to zero + double sum2; + double parl = 0; + if (rank == solvedCols) { + for (int j = 0; j < solvedCols; ++j) { + int pj = permutation[j]; + work1[pj] *= diag[pj] / dxNorm; + } + sum2 = 0; + for (int j = 0; j < solvedCols; ++j) { + int pj = permutation[j]; + double sum = 0; + for (int i = 0; i < j; ++i) { + sum += weightedJacobian[i][pj] * work1[permutation[i]]; + } + double s = (work1[pj] - sum) / diagR[pj]; + work1[pj] = s; + sum2 += s * s; + } + parl = fp / (delta * sum2); + } + + // calculate an upper bound, paru, for the zero of the function + sum2 = 0; + for (int j = 0; j < solvedCols; ++j) { + int pj = permutation[j]; + double sum = 0; + for (int i = 0; i <= j; ++i) { + sum += weightedJacobian[i][pj] * qy[i]; + } + sum /= diag[pj]; + sum2 += sum * sum; + } + double gNorm = FastMath.sqrt(sum2); + double paru = gNorm / delta; + if (paru == 0) { + paru = Precision.SAFE_MIN / FastMath.min(delta, 0.1); + } + + // if the input par lies outside of the interval (parl,paru), + // set par to the closer endpoint + lmPar = FastMath.min(paru, FastMath.max(lmPar, parl)); + if (lmPar == 0) { + lmPar = gNorm / dxNorm; + } + + for (int countdown = 10; countdown >= 0; --countdown) { + + // evaluate the function at the current value of lmPar + if (lmPar == 0) { + lmPar = FastMath.max(Precision.SAFE_MIN, 0.001 * paru); + } + double sPar = FastMath.sqrt(lmPar); + for (int j = 0; j < solvedCols; ++j) { + int pj = permutation[j]; + work1[pj] = sPar * diag[pj]; + } + determineLMDirection(qy, work1, work2, work3); + + dxNorm = 0; + for (int j = 0; j < solvedCols; ++j) { + int pj = permutation[j]; + double s = diag[pj] * lmDir[pj]; + work3[pj] = s; + dxNorm += s * s; + } + dxNorm = FastMath.sqrt(dxNorm); + double previousFP = fp; + fp = dxNorm - delta; + + // if the function is small enough, accept the current value + // of lmPar, also test for the exceptional cases where parl is zero + if ((FastMath.abs(fp) <= 0.1 * delta) || + ((parl == 0) && (fp <= previousFP) && (previousFP < 0))) { + return; + } + + // compute the Newton correction + for (int j = 0; j < solvedCols; ++j) { + int pj = permutation[j]; + work1[pj] = work3[pj] * diag[pj] / dxNorm; + } + for (int j = 0; j < solvedCols; ++j) { + int pj = permutation[j]; + work1[pj] /= work2[j]; + double tmp = work1[pj]; + for (int i = j + 1; i < solvedCols; ++i) { + work1[permutation[i]] -= weightedJacobian[i][pj] * tmp; + } + } + sum2 = 0; + for (int j = 0; j < solvedCols; ++j) { + double s = work1[permutation[j]]; + sum2 += s * s; + } + double correction = fp / (delta * sum2); + + // depending on the sign of the function, update parl or paru. + if (fp > 0) { + parl = FastMath.max(parl, lmPar); + } else if (fp < 0) { + paru = FastMath.min(paru, lmPar); + } + + // compute an improved estimate for lmPar + lmPar = FastMath.max(parl, lmPar + correction); + + } + } + + /** + * Solve a*x = b and d*x = 0 in the least squares sense. + * <p>This implementation is a translation in Java of the MINPACK + * <a href="http://www.netlib.org/minpack/qrsolv.f">qrsolv</a> + * routine.</p> + * <p>This method sets the lmDir and lmDiag attributes.</p> + * <p>The authors of the original fortran function are:</p> + * <ul> + * <li>Argonne National Laboratory. MINPACK project. March 1980</li> + * <li>Burton S. Garbow</li> + * <li>Kenneth E. Hillstrom</li> + * <li>Jorge J. More</li> + * </ul> + * <p>Luc Maisonobe did the Java translation.</p> + * + * @param qy array containing qTy + * @param diag diagonal matrix + * @param lmDiag diagonal elements associated with lmDir + * @param work work array + */ + private void determineLMDirection(double[] qy, double[] diag, + double[] lmDiag, double[] work) { + + // copy R and Qty to preserve input and initialize s + // in particular, save the diagonal elements of R in lmDir + for (int j = 0; j < solvedCols; ++j) { + int pj = permutation[j]; + for (int i = j + 1; i < solvedCols; ++i) { + weightedJacobian[i][pj] = weightedJacobian[j][permutation[i]]; + } + lmDir[j] = diagR[pj]; + work[j] = qy[j]; + } + + // eliminate the diagonal matrix d using a Givens rotation + for (int j = 0; j < solvedCols; ++j) { + + // prepare the row of d to be eliminated, locating the + // diagonal element using p from the Q.R. factorization + int pj = permutation[j]; + double dpj = diag[pj]; + if (dpj != 0) { + Arrays.fill(lmDiag, j + 1, lmDiag.length, 0); + } + lmDiag[j] = dpj; + + // the transformations to eliminate the row of d + // modify only a single element of Qty + // beyond the first n, which is initially zero. + double qtbpj = 0; + for (int k = j; k < solvedCols; ++k) { + int pk = permutation[k]; + + // determine a Givens rotation which eliminates the + // appropriate element in the current row of d + if (lmDiag[k] != 0) { + + final double sin; + final double cos; + double rkk = weightedJacobian[k][pk]; + if (FastMath.abs(rkk) < FastMath.abs(lmDiag[k])) { + final double cotan = rkk / lmDiag[k]; + sin = 1.0 / FastMath.sqrt(1.0 + cotan * cotan); + cos = sin * cotan; + } else { + final double tan = lmDiag[k] / rkk; + cos = 1.0 / FastMath.sqrt(1.0 + tan * tan); + sin = cos * tan; + } + + // compute the modified diagonal element of R and + // the modified element of (Qty,0) + weightedJacobian[k][pk] = cos * rkk + sin * lmDiag[k]; + final double temp = cos * work[k] + sin * qtbpj; + qtbpj = -sin * work[k] + cos * qtbpj; + work[k] = temp; + + // accumulate the tranformation in the row of s + for (int i = k + 1; i < solvedCols; ++i) { + double rik = weightedJacobian[i][pk]; + final double temp2 = cos * rik + sin * lmDiag[i]; + lmDiag[i] = -sin * rik + cos * lmDiag[i]; + weightedJacobian[i][pk] = temp2; + } + } + } + + // store the diagonal element of s and restore + // the corresponding diagonal element of R + lmDiag[j] = weightedJacobian[j][permutation[j]]; + weightedJacobian[j][permutation[j]] = lmDir[j]; + } + + // solve the triangular system for z, if the system is + // singular, then obtain a least squares solution + int nSing = solvedCols; + for (int j = 0; j < solvedCols; ++j) { + if ((lmDiag[j] == 0) && (nSing == solvedCols)) { + nSing = j; + } + if (nSing < solvedCols) { + work[j] = 0; + } + } + if (nSing > 0) { + for (int j = nSing - 1; j >= 0; --j) { + int pj = permutation[j]; + double sum = 0; + for (int i = j + 1; i < nSing; ++i) { + sum += weightedJacobian[i][pj] * work[i]; + } + work[j] = (work[j] - sum) / lmDiag[j]; + } + } + + // permute the components of z back to components of lmDir + for (int j = 0; j < lmDir.length; ++j) { + lmDir[permutation[j]] = work[j]; + } + } + + /** + * Decompose a matrix A as A.P = Q.R using Householder transforms. + * <p>As suggested in the P. Lascaux and R. Theodor book + * <i>Analyse numérique matricielle appliquée à + * l'art de l'ingénieur</i> (Masson, 1986), instead of representing + * the Householder transforms with u<sub>k</sub> unit vectors such that: + * <pre> + * H<sub>k</sub> = I - 2u<sub>k</sub>.u<sub>k</sub><sup>t</sup> + * </pre> + * we use <sub>k</sub> non-unit vectors such that: + * <pre> + * H<sub>k</sub> = I - beta<sub>k</sub>v<sub>k</sub>.v<sub>k</sub><sup>t</sup> + * </pre> + * where v<sub>k</sub> = a<sub>k</sub> - alpha<sub>k</sub> e<sub>k</sub>. + * The beta<sub>k</sub> coefficients are provided upon exit as recomputing + * them from the v<sub>k</sub> vectors would be costly.</p> + * <p>This decomposition handles rank deficient cases since the tranformations + * are performed in non-increasing columns norms order thanks to columns + * pivoting. The diagonal elements of the R matrix are therefore also in + * non-increasing absolute values order.</p> + * + * @param jacobian Weighted Jacobian matrix at the current point. + * @exception ConvergenceException if the decomposition cannot be performed + */ + private void qrDecomposition(RealMatrix jacobian) throws ConvergenceException { + // Code in this class assumes that the weighted Jacobian is -(W^(1/2) J), + // hence the multiplication by -1. + weightedJacobian = jacobian.scalarMultiply(-1).getData(); + + final int nR = weightedJacobian.length; + final int nC = weightedJacobian[0].length; + + // initializations + for (int k = 0; k < nC; ++k) { + permutation[k] = k; + double norm2 = 0; + for (int i = 0; i < nR; ++i) { + double akk = weightedJacobian[i][k]; + norm2 += akk * akk; + } + jacNorm[k] = FastMath.sqrt(norm2); + } + + // transform the matrix column after column + for (int k = 0; k < nC; ++k) { + + // select the column with the greatest norm on active components + int nextColumn = -1; + double ak2 = Double.NEGATIVE_INFINITY; + for (int i = k; i < nC; ++i) { + double norm2 = 0; + for (int j = k; j < nR; ++j) { + double aki = weightedJacobian[j][permutation[i]]; + norm2 += aki * aki; + } + if (Double.isInfinite(norm2) || Double.isNaN(norm2)) { + throw new ConvergenceException(LocalizedFormats.UNABLE_TO_PERFORM_QR_DECOMPOSITION_ON_JACOBIAN, + nR, nC); + } + if (norm2 > ak2) { + nextColumn = i; + ak2 = norm2; + } + } + if (ak2 <= qrRankingThreshold) { + rank = k; + return; + } + int pk = permutation[nextColumn]; + permutation[nextColumn] = permutation[k]; + permutation[k] = pk; + + // choose alpha such that Hk.u = alpha ek + double akk = weightedJacobian[k][pk]; + double alpha = (akk > 0) ? -FastMath.sqrt(ak2) : FastMath.sqrt(ak2); + double betak = 1.0 / (ak2 - akk * alpha); + beta[pk] = betak; + + // transform the current column + diagR[pk] = alpha; + weightedJacobian[k][pk] -= alpha; + + // transform the remaining columns + for (int dk = nC - 1 - k; dk > 0; --dk) { + double gamma = 0; + for (int j = k; j < nR; ++j) { + gamma += weightedJacobian[j][pk] * weightedJacobian[j][permutation[k + dk]]; + } + gamma *= betak; + for (int j = k; j < nR; ++j) { + weightedJacobian[j][permutation[k + dk]] -= gamma * weightedJacobian[j][pk]; + } + } + } + rank = solvedCols; + } + + /** + * Compute the product Qt.y for some Q.R. decomposition. + * + * @param y vector to multiply (will be overwritten with the result) + */ + private void qTy(double[] y) { + final int nR = weightedJacobian.length; + final int nC = weightedJacobian[0].length; + + for (int k = 0; k < nC; ++k) { + int pk = permutation[k]; + double gamma = 0; + for (int i = k; i < nR; ++i) { + gamma += weightedJacobian[i][pk] * y[i]; + } + gamma *= beta[pk]; + for (int i = k; i < nR; ++i) { + y[i] -= gamma * weightedJacobian[i][pk]; + } + } + } + + /** + * @throws MathUnsupportedOperationException if bounds were passed to the + * {@link #optimize(OptimizationData[]) optimize} method. + */ + private void checkParameters() { + if (getLowerBound() != null || + getUpperBound() != null) { + throw new MathUnsupportedOperationException(LocalizedFormats.CONSTRAINT); + } + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/nonlinear/vector/jacobian/package-info.java b/src/main/java/org/apache/commons/math3/optim/nonlinear/vector/jacobian/package-info.java new file mode 100644 index 0000000..4c844ba --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/nonlinear/vector/jacobian/package-info.java @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * This package provides optimization algorithms that require derivatives. + * + * @deprecated All classes and interfaces in this package are deprecated. + * The optimizers that were provided here were moved to the + * {@link org.apache.commons.math3.fitting.leastsquares} package + * (cf. MATH-1008). + */ +package org.apache.commons.math3.optim.nonlinear.vector.jacobian; diff --git a/src/main/java/org/apache/commons/math3/optim/nonlinear/vector/package-info.java b/src/main/java/org/apache/commons/math3/optim/nonlinear/vector/package-info.java new file mode 100644 index 0000000..439fc3c --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/nonlinear/vector/package-info.java @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Algorithms for optimizing a vector function. + * + * @deprecated All classes and interfaces in this package are deprecated. + * The optimizers that were provided here were moved to the + * {@link org.apache.commons.math3.fitting.leastsquares} package + * (cf. MATH-1008). + */ +package org.apache.commons.math3.optim.nonlinear.vector; diff --git a/src/main/java/org/apache/commons/math3/optim/package-info.java b/src/main/java/org/apache/commons/math3/optim/package-info.java new file mode 100644 index 0000000..e2f3c9f --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/package-info.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Generally, optimizers are algorithms that will either {@link + * org.apache.commons.math3.optim.nonlinear.scalar.GoalType#MINIMIZE minimize} or {@link + * org.apache.commons.math3.optim.nonlinear.scalar.GoalType#MAXIMIZE maximize} a scalar function, + * called the {@link org.apache.commons.math3.optim.nonlinear.scalar.ObjectiveFunction <em>objective + * function</em>}. <br> + * For some scalar objective functions the gradient can be computed (analytically or numerically). + * Algorithms that use this knowledge are defined in the {@link + * org.apache.commons.math3.optim.nonlinear.scalar.gradient} package. The algorithms that do not + * need this additional information are located in the {@link + * org.apache.commons.math3.optim.nonlinear.scalar.noderiv} package. + * + * <p>Some problems are solved more efficiently by algorithms that, instead of an objective + * function, need access to a {@link org.apache.commons.math3.optim.nonlinear.vector.ModelFunction + * <em>model function</em>}: such a model predicts a set of values which the algorithm tries to + * match with a set of given {@link org.apache.commons.math3.optim.nonlinear.vector.Target target + * values}. Those algorithms are located in the {@link + * org.apache.commons.math3.optim.nonlinear.vector} package. <br> + * Algorithms that also require the {@link + * org.apache.commons.math3.optim.nonlinear.vector.ModelFunctionJacobian Jacobian matrix of the + * model} are located in the {@link org.apache.commons.math3.optim.nonlinear.vector.jacobian} + * package. <br> + * The {@link org.apache.commons.math3.optim.nonlinear.vector.jacobian.AbstractLeastSquaresOptimizer + * non-linear least-squares optimizers} are a specialization of the the latter, that minimize the + * distance (called <em>cost</em> or <em>χ<sup>2</sup></em>) between model and observations. + * <br> + * For cases where the Jacobian cannot be provided, a utility class will {@link + * org.apache.commons.math3.optim.nonlinear.scalar.LeastSquaresConverter convert} a (vector) model + * into a (scalar) objective function. + * + * <p>This package provides common functionality for the optimization algorithms. Abstract classes + * ({@link org.apache.commons.math3.optim.BaseOptimizer} and {@link + * org.apache.commons.math3.optim.BaseMultivariateOptimizer}) contain boiler-plate code for storing + * {@link org.apache.commons.math3.optim.MaxEval evaluations} and {@link + * org.apache.commons.math3.optim.MaxIter iterations} counters and a user-defined {@link + * org.apache.commons.math3.optim.ConvergenceChecker convergence checker}. + * + * <p>For each of the optimizer types, there is a special implementation that wraps an optimizer + * instance and provides a "multi-start" feature: it calls the underlying optimizer several times + * with different starting points and returns the best optimum found, or all optima if so desired. + * This could be useful to avoid being trapped in a local extremum. + */ +package org.apache.commons.math3.optim; diff --git a/src/main/java/org/apache/commons/math3/optim/univariate/BracketFinder.java b/src/main/java/org/apache/commons/math3/optim/univariate/BracketFinder.java new file mode 100644 index 0000000..6d42c0d --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/univariate/BracketFinder.java @@ -0,0 +1,290 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim.univariate; + +import org.apache.commons.math3.util.FastMath; +import org.apache.commons.math3.util.IntegerSequence; +import org.apache.commons.math3.exception.NotStrictlyPositiveException; +import org.apache.commons.math3.exception.TooManyEvaluationsException; +import org.apache.commons.math3.exception.MaxCountExceededException; +import org.apache.commons.math3.analysis.UnivariateFunction; +import org.apache.commons.math3.optim.nonlinear.scalar.GoalType; + +/** + * Provide an interval that brackets a local optimum of a function. + * This code is based on a Python implementation (from <em>SciPy</em>, + * module {@code optimize.py} v0.5). + * + * @since 2.2 + */ +public class BracketFinder { + /** Tolerance to avoid division by zero. */ + private static final double EPS_MIN = 1e-21; + /** + * Golden section. + */ + private static final double GOLD = 1.618034; + /** + * Factor for expanding the interval. + */ + private final double growLimit; + /** + * Counter for function evaluations. + */ + private IntegerSequence.Incrementor evaluations; + /** + * Lower bound of the bracket. + */ + private double lo; + /** + * Higher bound of the bracket. + */ + private double hi; + /** + * Point inside the bracket. + */ + private double mid; + /** + * Function value at {@link #lo}. + */ + private double fLo; + /** + * Function value at {@link #hi}. + */ + private double fHi; + /** + * Function value at {@link #mid}. + */ + private double fMid; + + /** + * Constructor with default values {@code 100, 500} (see the + * {@link #BracketFinder(double,int) other constructor}). + */ + public BracketFinder() { + this(100, 500); + } + + /** + * Create a bracketing interval finder. + * + * @param growLimit Expanding factor. + * @param maxEvaluations Maximum number of evaluations allowed for finding + * a bracketing interval. + */ + public BracketFinder(double growLimit, + int maxEvaluations) { + if (growLimit <= 0) { + throw new NotStrictlyPositiveException(growLimit); + } + if (maxEvaluations <= 0) { + throw new NotStrictlyPositiveException(maxEvaluations); + } + + this.growLimit = growLimit; + evaluations = IntegerSequence.Incrementor.create().withMaximalCount(maxEvaluations); + } + + /** + * Search new points that bracket a local optimum of the function. + * + * @param func Function whose optimum should be bracketed. + * @param goal {@link GoalType Goal type}. + * @param xA Initial point. + * @param xB Initial point. + * @throws TooManyEvaluationsException if the maximum number of evaluations + * is exceeded. + */ + public void search(UnivariateFunction func, + GoalType goal, + double xA, + double xB) { + evaluations = evaluations.withStart(0); + final boolean isMinim = goal == GoalType.MINIMIZE; + + double fA = eval(func, xA); + double fB = eval(func, xB); + if (isMinim ? + fA < fB : + fA > fB) { + + double tmp = xA; + xA = xB; + xB = tmp; + + tmp = fA; + fA = fB; + fB = tmp; + } + + double xC = xB + GOLD * (xB - xA); + double fC = eval(func, xC); + + while (isMinim ? fC < fB : fC > fB) { + double tmp1 = (xB - xA) * (fB - fC); + double tmp2 = (xB - xC) * (fB - fA); + + double val = tmp2 - tmp1; + double denom = FastMath.abs(val) < EPS_MIN ? 2 * EPS_MIN : 2 * val; + + double w = xB - ((xB - xC) * tmp2 - (xB - xA) * tmp1) / denom; + double wLim = xB + growLimit * (xC - xB); + + double fW; + if ((w - xC) * (xB - w) > 0) { + fW = eval(func, w); + if (isMinim ? + fW < fC : + fW > fC) { + xA = xB; + xB = w; + fA = fB; + fB = fW; + break; + } else if (isMinim ? + fW > fB : + fW < fB) { + xC = w; + fC = fW; + break; + } + w = xC + GOLD * (xC - xB); + fW = eval(func, w); + } else if ((w - wLim) * (wLim - xC) >= 0) { + w = wLim; + fW = eval(func, w); + } else if ((w - wLim) * (xC - w) > 0) { + fW = eval(func, w); + if (isMinim ? + fW < fC : + fW > fC) { + xB = xC; + xC = w; + w = xC + GOLD * (xC - xB); + fB = fC; + fC =fW; + fW = eval(func, w); + } + } else { + w = xC + GOLD * (xC - xB); + fW = eval(func, w); + } + + xA = xB; + fA = fB; + xB = xC; + fB = fC; + xC = w; + fC = fW; + } + + lo = xA; + fLo = fA; + mid = xB; + fMid = fB; + hi = xC; + fHi = fC; + + if (lo > hi) { + double tmp = lo; + lo = hi; + hi = tmp; + + tmp = fLo; + fLo = fHi; + fHi = tmp; + } + } + + /** + * @return the number of evalutations. + */ + public int getMaxEvaluations() { + return evaluations.getMaximalCount(); + } + + /** + * @return the number of evalutations. + */ + public int getEvaluations() { + return evaluations.getCount(); + } + + /** + * @return the lower bound of the bracket. + * @see #getFLo() + */ + public double getLo() { + return lo; + } + + /** + * Get function value at {@link #getLo()}. + * @return function value at {@link #getLo()} + */ + public double getFLo() { + return fLo; + } + + /** + * @return the higher bound of the bracket. + * @see #getFHi() + */ + public double getHi() { + return hi; + } + + /** + * Get function value at {@link #getHi()}. + * @return function value at {@link #getHi()} + */ + public double getFHi() { + return fHi; + } + + /** + * @return a point in the middle of the bracket. + * @see #getFMid() + */ + public double getMid() { + return mid; + } + + /** + * Get function value at {@link #getMid()}. + * @return function value at {@link #getMid()} + */ + public double getFMid() { + return fMid; + } + + /** + * @param f Function. + * @param x Argument. + * @return {@code f(x)} + * @throws TooManyEvaluationsException if the maximal number of evaluations is + * exceeded. + */ + private double eval(UnivariateFunction f, double x) { + try { + evaluations.increment(); + } catch (MaxCountExceededException e) { + throw new TooManyEvaluationsException(e.getMax()); + } + return f.value(x); + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/univariate/BrentOptimizer.java b/src/main/java/org/apache/commons/math3/optim/univariate/BrentOptimizer.java new file mode 100644 index 0000000..d783405 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/univariate/BrentOptimizer.java @@ -0,0 +1,314 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim.univariate; + +import org.apache.commons.math3.util.Precision; +import org.apache.commons.math3.util.FastMath; +import org.apache.commons.math3.exception.NumberIsTooSmallException; +import org.apache.commons.math3.exception.NotStrictlyPositiveException; +import org.apache.commons.math3.optim.ConvergenceChecker; +import org.apache.commons.math3.optim.nonlinear.scalar.GoalType; + +/** + * For a function defined on some interval {@code (lo, hi)}, this class + * finds an approximation {@code x} to the point at which the function + * attains its minimum. + * It implements Richard Brent's algorithm (from his book "Algorithms for + * Minimization without Derivatives", p. 79) for finding minima of real + * univariate functions. + * <br/> + * This code is an adaptation, partly based on the Python code from SciPy + * (module "optimize.py" v0.5); the original algorithm is also modified + * <ul> + * <li>to use an initial guess provided by the user,</li> + * <li>to ensure that the best point encountered is the one returned.</li> + * </ul> + * + * @since 2.0 + */ +public class BrentOptimizer extends UnivariateOptimizer { + /** + * Golden section. + */ + private static final double GOLDEN_SECTION = 0.5 * (3 - FastMath.sqrt(5)); + /** + * Minimum relative tolerance. + */ + private static final double MIN_RELATIVE_TOLERANCE = 2 * FastMath.ulp(1d); + /** + * Relative threshold. + */ + private final double relativeThreshold; + /** + * Absolute threshold. + */ + private final double absoluteThreshold; + + /** + * The arguments are used implement the original stopping criterion + * of Brent's algorithm. + * {@code abs} and {@code rel} define a tolerance + * {@code tol = rel |x| + abs}. {@code rel} should be no smaller than + * <em>2 macheps</em> and preferably not much less than <em>sqrt(macheps)</em>, + * where <em>macheps</em> is the relative machine precision. {@code abs} must + * be positive. + * + * @param rel Relative threshold. + * @param abs Absolute threshold. + * @param checker Additional, user-defined, convergence checking + * procedure. + * @throws NotStrictlyPositiveException if {@code abs <= 0}. + * @throws NumberIsTooSmallException if {@code rel < 2 * Math.ulp(1d)}. + */ + public BrentOptimizer(double rel, + double abs, + ConvergenceChecker<UnivariatePointValuePair> checker) { + super(checker); + + if (rel < MIN_RELATIVE_TOLERANCE) { + throw new NumberIsTooSmallException(rel, MIN_RELATIVE_TOLERANCE, true); + } + if (abs <= 0) { + throw new NotStrictlyPositiveException(abs); + } + + relativeThreshold = rel; + absoluteThreshold = abs; + } + + /** + * The arguments are used for implementing the original stopping criterion + * of Brent's algorithm. + * {@code abs} and {@code rel} define a tolerance + * {@code tol = rel |x| + abs}. {@code rel} should be no smaller than + * <em>2 macheps</em> and preferably not much less than <em>sqrt(macheps)</em>, + * where <em>macheps</em> is the relative machine precision. {@code abs} must + * be positive. + * + * @param rel Relative threshold. + * @param abs Absolute threshold. + * @throws NotStrictlyPositiveException if {@code abs <= 0}. + * @throws NumberIsTooSmallException if {@code rel < 2 * Math.ulp(1d)}. + */ + public BrentOptimizer(double rel, + double abs) { + this(rel, abs, null); + } + + /** {@inheritDoc} */ + @Override + protected UnivariatePointValuePair doOptimize() { + final boolean isMinim = getGoalType() == GoalType.MINIMIZE; + final double lo = getMin(); + final double mid = getStartValue(); + final double hi = getMax(); + + // Optional additional convergence criteria. + final ConvergenceChecker<UnivariatePointValuePair> checker + = getConvergenceChecker(); + + double a; + double b; + if (lo < hi) { + a = lo; + b = hi; + } else { + a = hi; + b = lo; + } + + double x = mid; + double v = x; + double w = x; + double d = 0; + double e = 0; + double fx = computeObjectiveValue(x); + if (!isMinim) { + fx = -fx; + } + double fv = fx; + double fw = fx; + + UnivariatePointValuePair previous = null; + UnivariatePointValuePair current + = new UnivariatePointValuePair(x, isMinim ? fx : -fx); + // Best point encountered so far (which is the initial guess). + UnivariatePointValuePair best = current; + + while (true) { + final double m = 0.5 * (a + b); + final double tol1 = relativeThreshold * FastMath.abs(x) + absoluteThreshold; + final double tol2 = 2 * tol1; + + // Default stopping criterion. + final boolean stop = FastMath.abs(x - m) <= tol2 - 0.5 * (b - a); + if (!stop) { + double p = 0; + double q = 0; + double r = 0; + double u = 0; + + if (FastMath.abs(e) > tol1) { // Fit parabola. + r = (x - w) * (fx - fv); + q = (x - v) * (fx - fw); + p = (x - v) * q - (x - w) * r; + q = 2 * (q - r); + + if (q > 0) { + p = -p; + } else { + q = -q; + } + + r = e; + e = d; + + if (p > q * (a - x) && + p < q * (b - x) && + FastMath.abs(p) < FastMath.abs(0.5 * q * r)) { + // Parabolic interpolation step. + d = p / q; + u = x + d; + + // f must not be evaluated too close to a or b. + if (u - a < tol2 || b - u < tol2) { + if (x <= m) { + d = tol1; + } else { + d = -tol1; + } + } + } else { + // Golden section step. + if (x < m) { + e = b - x; + } else { + e = a - x; + } + d = GOLDEN_SECTION * e; + } + } else { + // Golden section step. + if (x < m) { + e = b - x; + } else { + e = a - x; + } + d = GOLDEN_SECTION * e; + } + + // Update by at least "tol1". + if (FastMath.abs(d) < tol1) { + if (d >= 0) { + u = x + tol1; + } else { + u = x - tol1; + } + } else { + u = x + d; + } + + double fu = computeObjectiveValue(u); + if (!isMinim) { + fu = -fu; + } + + // User-defined convergence checker. + previous = current; + current = new UnivariatePointValuePair(u, isMinim ? fu : -fu); + best = best(best, + best(previous, + current, + isMinim), + isMinim); + + if (checker != null && checker.converged(getIterations(), previous, current)) { + return best; + } + + // Update a, b, v, w and x. + if (fu <= fx) { + if (u < x) { + b = x; + } else { + a = x; + } + v = w; + fv = fw; + w = x; + fw = fx; + x = u; + fx = fu; + } else { + if (u < x) { + a = u; + } else { + b = u; + } + if (fu <= fw || + Precision.equals(w, x)) { + v = w; + fv = fw; + w = u; + fw = fu; + } else if (fu <= fv || + Precision.equals(v, x) || + Precision.equals(v, w)) { + v = u; + fv = fu; + } + } + } else { // Default termination (Brent's criterion). + return best(best, + best(previous, + current, + isMinim), + isMinim); + } + + incrementIterationCount(); + } + } + + /** + * Selects the best of two points. + * + * @param a Point and value. + * @param b Point and value. + * @param isMinim {@code true} if the selected point must be the one with + * the lowest value. + * @return the best point, or {@code null} if {@code a} and {@code b} are + * both {@code null}. When {@code a} and {@code b} have the same function + * value, {@code a} is returned. + */ + private UnivariatePointValuePair best(UnivariatePointValuePair a, + UnivariatePointValuePair b, + boolean isMinim) { + if (a == null) { + return b; + } + if (b == null) { + return a; + } + + if (isMinim) { + return a.getValue() <= b.getValue() ? a : b; + } else { + return a.getValue() >= b.getValue() ? a : b; + } + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/univariate/MultiStartUnivariateOptimizer.java b/src/main/java/org/apache/commons/math3/optim/univariate/MultiStartUnivariateOptimizer.java new file mode 100644 index 0000000..d12ec97 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/univariate/MultiStartUnivariateOptimizer.java @@ -0,0 +1,228 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.math3.optim.univariate; + +import java.util.Arrays; +import java.util.Comparator; +import org.apache.commons.math3.exception.MathIllegalStateException; +import org.apache.commons.math3.exception.NotStrictlyPositiveException; +import org.apache.commons.math3.exception.util.LocalizedFormats; +import org.apache.commons.math3.random.RandomGenerator; +import org.apache.commons.math3.optim.MaxEval; +import org.apache.commons.math3.optim.nonlinear.scalar.GoalType; +import org.apache.commons.math3.optim.OptimizationData; + +/** + * Special implementation of the {@link UnivariateOptimizer} interface + * adding multi-start features to an existing optimizer. + * <br/> + * This class wraps an optimizer in order to use it several times in + * turn with different starting points (trying to avoid being trapped + * in a local extremum when looking for a global one). + * + * @since 3.0 + */ +public class MultiStartUnivariateOptimizer + extends UnivariateOptimizer { + /** Underlying classical optimizer. */ + private final UnivariateOptimizer optimizer; + /** Number of evaluations already performed for all starts. */ + private int totalEvaluations; + /** Number of starts to go. */ + private int starts; + /** Random generator for multi-start. */ + private RandomGenerator generator; + /** Found optima. */ + private UnivariatePointValuePair[] optima; + /** Optimization data. */ + private OptimizationData[] optimData; + /** + * Location in {@link #optimData} where the updated maximum + * number of evaluations will be stored. + */ + private int maxEvalIndex = -1; + /** + * Location in {@link #optimData} where the updated start value + * will be stored. + */ + private int searchIntervalIndex = -1; + + /** + * Create a multi-start optimizer from a single-start optimizer. + * + * @param optimizer Single-start optimizer to wrap. + * @param starts Number of starts to perform. If {@code starts == 1}, + * the {@code optimize} methods will return the same solution as + * {@code optimizer} would. + * @param generator Random generator to use for restarts. + * @throws NotStrictlyPositiveException if {@code starts < 1}. + */ + public MultiStartUnivariateOptimizer(final UnivariateOptimizer optimizer, + final int starts, + final RandomGenerator generator) { + super(optimizer.getConvergenceChecker()); + + if (starts < 1) { + throw new NotStrictlyPositiveException(starts); + } + + this.optimizer = optimizer; + this.starts = starts; + this.generator = generator; + } + + /** {@inheritDoc} */ + @Override + public int getEvaluations() { + return totalEvaluations; + } + + /** + * Gets all the optima found during the last call to {@code optimize}. + * The optimizer stores all the optima found during a set of + * restarts. The {@code optimize} method returns the best point only. + * This method returns all the points found at the end of each starts, + * including the best one already returned by the {@code optimize} method. + * <br/> + * The returned array as one element for each start as specified + * in the constructor. It is ordered with the results from the + * runs that did converge first, sorted from best to worst + * objective value (i.e in ascending order if minimizing and in + * descending order if maximizing), followed by {@code null} elements + * corresponding to the runs that did not converge. This means all + * elements will be {@code null} if the {@code optimize} method did throw + * an exception. + * This also means that if the first element is not {@code null}, it is + * the best point found across all starts. + * + * @return an array containing the optima. + * @throws MathIllegalStateException if {@link #optimize(OptimizationData[]) + * optimize} has not been called. + */ + public UnivariatePointValuePair[] getOptima() { + if (optima == null) { + throw new MathIllegalStateException(LocalizedFormats.NO_OPTIMUM_COMPUTED_YET); + } + return optima.clone(); + } + + /** + * {@inheritDoc} + * + * @throws MathIllegalStateException if {@code optData} does not contain an + * instance of {@link MaxEval} or {@link SearchInterval}. + */ + @Override + public UnivariatePointValuePair optimize(OptimizationData... optData) { + // Store arguments in order to pass them to the internal optimizer. + optimData = optData; + // Set up base class and perform computations. + return super.optimize(optData); + } + + /** {@inheritDoc} */ + @Override + protected UnivariatePointValuePair doOptimize() { + // Remove all instances of "MaxEval" and "SearchInterval" from the + // array that will be passed to the internal optimizer. + // The former is to enforce smaller numbers of allowed evaluations + // (according to how many have been used up already), and the latter + // to impose a different start value for each start. + for (int i = 0; i < optimData.length; i++) { + if (optimData[i] instanceof MaxEval) { + optimData[i] = null; + maxEvalIndex = i; + continue; + } + if (optimData[i] instanceof SearchInterval) { + optimData[i] = null; + searchIntervalIndex = i; + continue; + } + } + if (maxEvalIndex == -1) { + throw new MathIllegalStateException(); + } + if (searchIntervalIndex == -1) { + throw new MathIllegalStateException(); + } + + RuntimeException lastException = null; + optima = new UnivariatePointValuePair[starts]; + totalEvaluations = 0; + + final int maxEval = getMaxEvaluations(); + final double min = getMin(); + final double max = getMax(); + final double startValue = getStartValue(); + + // Multi-start loop. + for (int i = 0; i < starts; i++) { + // CHECKSTYLE: stop IllegalCatch + try { + // Decrease number of allowed evaluations. + optimData[maxEvalIndex] = new MaxEval(maxEval - totalEvaluations); + // New start value. + final double s = (i == 0) ? + startValue : + min + generator.nextDouble() * (max - min); + optimData[searchIntervalIndex] = new SearchInterval(min, max, s); + // Optimize. + optima[i] = optimizer.optimize(optimData); + } catch (RuntimeException mue) { + lastException = mue; + optima[i] = null; + } + // CHECKSTYLE: resume IllegalCatch + + totalEvaluations += optimizer.getEvaluations(); + } + + sortPairs(getGoalType()); + + if (optima[0] == null) { + throw lastException; // Cannot be null if starts >= 1. + } + + // Return the point with the best objective function value. + return optima[0]; + } + + /** + * Sort the optima from best to worst, followed by {@code null} elements. + * + * @param goal Goal type. + */ + private void sortPairs(final GoalType goal) { + Arrays.sort(optima, new Comparator<UnivariatePointValuePair>() { + /** {@inheritDoc} */ + public int compare(final UnivariatePointValuePair o1, + final UnivariatePointValuePair o2) { + if (o1 == null) { + return (o2 == null) ? 0 : 1; + } else if (o2 == null) { + return -1; + } + final double v1 = o1.getValue(); + final double v2 = o2.getValue(); + return (goal == GoalType.MINIMIZE) ? + Double.compare(v1, v2) : Double.compare(v2, v1); + } + }); + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/univariate/SearchInterval.java b/src/main/java/org/apache/commons/math3/optim/univariate/SearchInterval.java new file mode 100644 index 0000000..fa80e64 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/univariate/SearchInterval.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim.univariate; + +import org.apache.commons.math3.optim.OptimizationData; +import org.apache.commons.math3.exception.NumberIsTooLargeException; +import org.apache.commons.math3.exception.OutOfRangeException; + +/** + * Search interval and (optional) start value. + * <br/> + * Immutable class. + * + * @since 3.1 + */ +public class SearchInterval implements OptimizationData { + /** Lower bound. */ + private final double lower; + /** Upper bound. */ + private final double upper; + /** Start value. */ + private final double start; + + /** + * @param lo Lower bound. + * @param hi Upper bound. + * @param init Start value. + * @throws NumberIsTooLargeException if {@code lo >= hi}. + * @throws OutOfRangeException if {@code init < lo} or {@code init > hi}. + */ + public SearchInterval(double lo, + double hi, + double init) { + if (lo >= hi) { + throw new NumberIsTooLargeException(lo, hi, false); + } + if (init < lo || + init > hi) { + throw new OutOfRangeException(init, lo, hi); + } + + lower = lo; + upper = hi; + start = init; + } + + /** + * @param lo Lower bound. + * @param hi Upper bound. + * @throws NumberIsTooLargeException if {@code lo >= hi}. + */ + public SearchInterval(double lo, + double hi) { + this(lo, hi, 0.5 * (lo + hi)); + } + + /** + * Gets the lower bound. + * + * @return the lower bound. + */ + public double getMin() { + return lower; + } + /** + * Gets the upper bound. + * + * @return the upper bound. + */ + public double getMax() { + return upper; + } + /** + * Gets the start value. + * + * @return the start value. + */ + public double getStartValue() { + return start; + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/univariate/SimpleUnivariateValueChecker.java b/src/main/java/org/apache/commons/math3/optim/univariate/SimpleUnivariateValueChecker.java new file mode 100644 index 0000000..58cc521 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/univariate/SimpleUnivariateValueChecker.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim.univariate; + +import org.apache.commons.math3.util.FastMath; +import org.apache.commons.math3.exception.NotStrictlyPositiveException; +import org.apache.commons.math3.optim.AbstractConvergenceChecker; + +/** + * Simple implementation of the + * {@link org.apache.commons.math3.optimization.ConvergenceChecker} interface + * that uses only objective function values. + * + * Convergence is considered to have been reached if either the relative + * difference between the objective function values is smaller than a + * threshold or if either the absolute difference between the objective + * function values is smaller than another threshold. + * <br/> + * The {@link #converged(int,UnivariatePointValuePair,UnivariatePointValuePair) + * converged} method will also return {@code true} if the number of iterations + * has been set (see {@link #SimpleUnivariateValueChecker(double,double,int) + * this constructor}). + * + * @since 3.1 + */ +public class SimpleUnivariateValueChecker + extends AbstractConvergenceChecker<UnivariatePointValuePair> { + /** + * If {@link #maxIterationCount} is set to this value, the number of + * iterations will never cause + * {@link #converged(int,UnivariatePointValuePair,UnivariatePointValuePair)} + * to return {@code true}. + */ + private static final int ITERATION_CHECK_DISABLED = -1; + /** + * Number of iterations after which the + * {@link #converged(int,UnivariatePointValuePair,UnivariatePointValuePair)} + * method will return true (unless the check is disabled). + */ + private final int maxIterationCount; + + /** Build an instance with specified thresholds. + * + * In order to perform only relative checks, the absolute tolerance + * must be set to a negative value. In order to perform only absolute + * checks, the relative tolerance must be set to a negative value. + * + * @param relativeThreshold relative tolerance threshold + * @param absoluteThreshold absolute tolerance threshold + */ + public SimpleUnivariateValueChecker(final double relativeThreshold, + final double absoluteThreshold) { + super(relativeThreshold, absoluteThreshold); + maxIterationCount = ITERATION_CHECK_DISABLED; + } + + /** + * Builds an instance with specified thresholds. + * + * In order to perform only relative checks, the absolute tolerance + * must be set to a negative value. In order to perform only absolute + * checks, the relative tolerance must be set to a negative value. + * + * @param relativeThreshold relative tolerance threshold + * @param absoluteThreshold absolute tolerance threshold + * @param maxIter Maximum iteration count. + * @throws NotStrictlyPositiveException if {@code maxIter <= 0}. + * + * @since 3.1 + */ + public SimpleUnivariateValueChecker(final double relativeThreshold, + final double absoluteThreshold, + final int maxIter) { + super(relativeThreshold, absoluteThreshold); + + if (maxIter <= 0) { + throw new NotStrictlyPositiveException(maxIter); + } + maxIterationCount = maxIter; + } + + /** + * Check if the optimization algorithm has converged considering the + * last two points. + * This method may be called several time from the same algorithm + * iteration with different points. This can be detected by checking the + * iteration number at each call if needed. Each time this method is + * called, the previous and current point correspond to points with the + * same role at each iteration, so they can be compared. As an example, + * simplex-based algorithms call this method for all points of the simplex, + * not only for the best or worst ones. + * + * @param iteration Index of current iteration + * @param previous Best point in the previous iteration. + * @param current Best point in the current iteration. + * @return {@code true} if the algorithm has converged. + */ + @Override + public boolean converged(final int iteration, + final UnivariatePointValuePair previous, + final UnivariatePointValuePair current) { + if (maxIterationCount != ITERATION_CHECK_DISABLED && iteration >= maxIterationCount) { + return true; + } + + final double p = previous.getValue(); + final double c = current.getValue(); + final double difference = FastMath.abs(p - c); + final double size = FastMath.max(FastMath.abs(p), FastMath.abs(c)); + return difference <= size * getRelativeThreshold() || + difference <= getAbsoluteThreshold(); + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/univariate/UnivariateObjectiveFunction.java b/src/main/java/org/apache/commons/math3/optim/univariate/UnivariateObjectiveFunction.java new file mode 100644 index 0000000..ad06d84 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/univariate/UnivariateObjectiveFunction.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim.univariate; + +import org.apache.commons.math3.analysis.UnivariateFunction; +import org.apache.commons.math3.optim.OptimizationData; + +/** + * Scalar function to be optimized. + * + * @since 3.1 + */ +public class UnivariateObjectiveFunction implements OptimizationData { + /** Function to be optimized. */ + private final UnivariateFunction function; + + /** + * @param f Function to be optimized. + */ + public UnivariateObjectiveFunction(UnivariateFunction f) { + function = f; + } + + /** + * Gets the function to be optimized. + * + * @return the objective function. + */ + public UnivariateFunction getObjectiveFunction() { + return function; + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/univariate/UnivariateOptimizer.java b/src/main/java/org/apache/commons/math3/optim/univariate/UnivariateOptimizer.java new file mode 100644 index 0000000..a7512c1 --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/univariate/UnivariateOptimizer.java @@ -0,0 +1,151 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math3.optim.univariate; + +import org.apache.commons.math3.analysis.UnivariateFunction; +import org.apache.commons.math3.optim.BaseOptimizer; +import org.apache.commons.math3.optim.OptimizationData; +import org.apache.commons.math3.optim.nonlinear.scalar.GoalType; +import org.apache.commons.math3.optim.ConvergenceChecker; +import org.apache.commons.math3.exception.TooManyEvaluationsException; + +/** + * Base class for a univariate scalar function optimizer. + * + * @since 3.1 + */ +public abstract class UnivariateOptimizer + extends BaseOptimizer<UnivariatePointValuePair> { + /** Objective function. */ + private UnivariateFunction function; + /** Type of optimization. */ + private GoalType goal; + /** Initial guess. */ + private double start; + /** Lower bound. */ + private double min; + /** Upper bound. */ + private double max; + + /** + * @param checker Convergence checker. + */ + protected UnivariateOptimizer(ConvergenceChecker<UnivariatePointValuePair> checker) { + super(checker); + } + + /** + * {@inheritDoc} + * + * @param optData Optimization data. In addition to those documented in + * {@link BaseOptimizer#parseOptimizationData(OptimizationData[]) + * BaseOptimizer}, this method will register the following data: + * <ul> + * <li>{@link GoalType}</li> + * <li>{@link SearchInterval}</li> + * <li>{@link UnivariateObjectiveFunction}</li> + * </ul> + * @return {@inheritDoc} + * @throws TooManyEvaluationsException if the maximal number of + * evaluations is exceeded. + */ + @Override + public UnivariatePointValuePair optimize(OptimizationData... optData) + throws TooManyEvaluationsException { + // Perform computation. + return super.optimize(optData); + } + + /** + * @return the optimization type. + */ + public GoalType getGoalType() { + return goal; + } + + /** + * Scans the list of (required and optional) optimization data that + * characterize the problem. + * + * @param optData Optimization data. + * The following data will be looked for: + * <ul> + * <li>{@link GoalType}</li> + * <li>{@link SearchInterval}</li> + * <li>{@link UnivariateObjectiveFunction}</li> + * </ul> + */ + @Override + protected void parseOptimizationData(OptimizationData... optData) { + // Allow base class to register its own data. + super.parseOptimizationData(optData); + + // The existing values (as set by the previous call) are reused if + // not provided in the argument list. + for (OptimizationData data : optData) { + if (data instanceof SearchInterval) { + final SearchInterval interval = (SearchInterval) data; + min = interval.getMin(); + max = interval.getMax(); + start = interval.getStartValue(); + continue; + } + if (data instanceof UnivariateObjectiveFunction) { + function = ((UnivariateObjectiveFunction) data).getObjectiveFunction(); + continue; + } + if (data instanceof GoalType) { + goal = (GoalType) data; + continue; + } + } + } + + /** + * @return the initial guess. + */ + public double getStartValue() { + return start; + } + /** + * @return the lower bounds. + */ + public double getMin() { + return min; + } + /** + * @return the upper bounds. + */ + public double getMax() { + return max; + } + + /** + * Computes the objective function value. + * This method <em>must</em> be called by subclasses to enforce the + * evaluation counter limit. + * + * @param x Point at which the objective function must be evaluated. + * @return the objective function value at the specified point. + * @throws TooManyEvaluationsException if the maximal number of + * evaluations is exceeded. + */ + protected double computeObjectiveValue(double x) { + super.incrementEvaluationCount(); + return function.value(x); + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/univariate/UnivariatePointValuePair.java b/src/main/java/org/apache/commons/math3/optim/univariate/UnivariatePointValuePair.java new file mode 100644 index 0000000..6b2b51a --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/univariate/UnivariatePointValuePair.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.math3.optim.univariate; + +import java.io.Serializable; + +/** + * This class holds a point and the value of an objective function at this + * point. + * This is a simple immutable container. + * + * @since 3.0 + */ +public class UnivariatePointValuePair implements Serializable { + /** Serializable version identifier. */ + private static final long serialVersionUID = 1003888396256744753L; + /** Point. */ + private final double point; + /** Value of the objective function at the point. */ + private final double value; + + /** + * Build a point/objective function value pair. + * + * @param point Point. + * @param value Value of an objective function at the point + */ + public UnivariatePointValuePair(final double point, + final double value) { + this.point = point; + this.value = value; + } + + /** + * Get the point. + * + * @return the point. + */ + public double getPoint() { + return point; + } + + /** + * Get the value of the objective function. + * + * @return the stored value of the objective function. + */ + public double getValue() { + return value; + } +} diff --git a/src/main/java/org/apache/commons/math3/optim/univariate/package-info.java b/src/main/java/org/apache/commons/math3/optim/univariate/package-info.java new file mode 100644 index 0000000..2273bab --- /dev/null +++ b/src/main/java/org/apache/commons/math3/optim/univariate/package-info.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * One-dimensional optimization algorithms. + */ +package org.apache.commons.math3.optim.univariate; |