ポイントのセットをクラスタ化する DBSCAN アルゴリズムを作成しようとしていますが、得られる結果は非常に悪いものです。これはデータのせいかもしれませんが、それだけではありません。発生してはならないサイズ < minPoints のクラスターを取得しています。
私は何を間違っていますか?コードを何度も調べましたが、何が問題なのかわかりません。
DBSCAN ウィキペディアのページに記載されているアルゴリズムを参照しました。
private static int[] dbScan(String[] points, int epsilon, int minPts) {
int cluster = 0;
// visited stores if point has been visited
boolean[] visited = new boolean[points.length];
// pointsCluster stores which cluster a point has been assigned to
int[] pointsCluster = new int[points.length];
for(int iii = 0; iii < points.length; iii++) {
// if point iii is already visited, do nothing
if(visited[iii]) continue;
visited[iii] = true; // mark point iii as visited
// get points in neighborhood of point iii
HashSet<Integer> neighbors = epsilonNeighbors(points, iii, epsilon);
if(neighbors.size() < minPts) {
// if number of neighbors < minPts, mark point iii as noise
pointsCluster[iii] = -1;
} else {
++cluster; // else, start new cluster
expandCluster(points, iii, neighbors, pointsCluster, visited, cluster, epsilon, minPts);
}
}
return pointsCluster;
}
/*
* Expands a cluster if a point is not a noise point
* and has > minPts in its epsilon neighborhood
*/
private static void expandCluster(String[] points, int seedPoint, HashSet<Integer> neighbors,
int[] pointsCluster, boolean[] visited, int cluster, int epsilon, int minPts) {
pointsCluster[seedPoint] = cluster; //assign cluster to seed point
// create queue to process neighbors
Queue<Integer> seeds = new LinkedList<Integer>();
seeds.addAll(neighbors);
while(!seeds.isEmpty()) {
int currentPoint = (Integer) seeds.poll();
if(!visited[currentPoint]) {
visited[currentPoint] = true; // mark neighbor as visited
// get neighbors of this currentPoint
HashSet<Integer> currentNeighbors = epsilonNeighbors(points, currentPoint, epsilon);
// if currentPoint has >= minPts in neighborhood, add those points to the queue
if(currentNeighbors.size() >= minPts) {
seeds.addAll(currentNeighbors);
}
}
// if currentPoint has not been assigned a cluster, assign it to the current cluster
if(pointsCluster[currentPoint] == 0) pointsCluster[currentPoint] = cluster;
}
}
/*
* Returns a HashSet containing the indexes of points which are
* in the epsilon neighborhood of the point at index == currentPoint
*/
private static HashSet<Integer> epsilonNeighbors(String[] points, int currentPoint, int epsilon) {
HashSet<Integer> neighbors = new HashSet<Integer>();
String protein = points[currentPoint];
for(int iii = 0; iii < points.length; iii++) {
int score = similarity(points[iii], points[jjj]);
if(score >= epsilon) neighbors.add(iii);
}
return neighbors;
}