/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.commons.math3.ml.clustering; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.commons.math3.exception.NotPositiveException; import org.apache.commons.math3.exception.NullArgumentException; import org.apache.commons.math3.ml.distance.DistanceMeasure; import org.apache.commons.math3.ml.distance.EuclideanDistance; import org.apache.commons.math3.util.MathUtils; /** * DBSCAN (density-based spatial clustering of applications with noise) algorithm. *
* The DBSCAN algorithm forms clusters based on the idea of density connectivity, i.e. * a point p is density connected to another point q, if there exists a chain of * points pi, with i = 1 .. n and p1 = p and pn = q, * such that each pair <pi, pi+1> is directly density-reachable. * A point q is directly density-reachable from point p if it is in the ε-neighborhood * of this point. *
* Any point that is not density-reachable from a formed cluster is treated as noise, and * will thus not be present in the result. *
* The algorithm requires two parameters: *
* The euclidean distance will be used as default distance measure.
*
* @param eps maximum radius of the neighborhood to be considered
* @param minPts minimum number of points needed for a cluster
* @throws NotPositiveException if {@code eps < 0.0} or {@code minPts < 0}
*/
public DBSCANClusterer(final double eps, final int minPts)
throws NotPositiveException {
this(eps, minPts, new EuclideanDistance());
}
/**
* Creates a new instance of a DBSCANClusterer.
*
* @param eps maximum radius of the neighborhood to be considered
* @param minPts minimum number of points needed for a cluster
* @param measure the distance measure to use
* @throws NotPositiveException if {@code eps < 0.0} or {@code minPts < 0}
*/
public DBSCANClusterer(final double eps, final int minPts, final DistanceMeasure measure)
throws NotPositiveException {
super(measure);
if (eps < 0.0d) {
throw new NotPositiveException(eps);
}
if (minPts < 0) {
throw new NotPositiveException(minPts);
}
this.eps = eps;
this.minPts = minPts;
}
/**
* Returns the maximum radius of the neighborhood to be considered.
* @return maximum radius of the neighborhood
*/
public double getEps() {
return eps;
}
/**
* Returns the minimum number of points needed for a cluster.
* @return minimum number of points needed for a cluster
*/
public int getMinPts() {
return minPts;
}
/**
* Performs DBSCAN cluster analysis.
*
* @param points the points to cluster
* @return the list of clusters
* @throws NullArgumentException if the data points are null
*/
@Override
public List