python - Pythonを使用したkmeans実装の空のリスト

Question

クラスターを使用してオブジェクトを識別するプロジェクトの K-Means クラスターを作成しているため、ロボットは自由にほぼ自律的です。カメラは基本的に、0.5 秒の
速度で写真をキャプチャします。これは、ピクセルの「ブロブ」に保存されます。このブロブはデータマイニングアルゴリズムである k-means に送信され、オブジェクトの「色合い」をクラスターとして識別するため、ロボットはそれらの領域を回避するようにプログラムできます。k-means コードを投稿します。それはpythonで書かれています。

import sys, math, random

class Point:

    def __init__(self, coords, reference=None):
        self.coords     = coords
        self.n          = len(coords)
        self.reference  = reference

    def __repr__(self):
        return str(self.coords)

class Cluster:

    def __init__(self, points):

        if len(points) == 0: 
            raise Exception("ILLEGAL: empty cluster")

        self.points = points
        self.n      = points[0].n       # make the first element to be the number of clusters

        for p in points:
            if p.n != self.n:
                raise Exception("ILLEGAL: wrong dimension")

        self.centroid = self.calculateCentroid()

    def __repr__(self):
        return str(self.points)

    def update(self, points):
        old_centroid    = self.centroid
        self.points     = points
        self.centroid   = self.calculateCentroid()
        return getDistance(old_centroid, self.centroid)

    def calculateCentroid(self):
        reduce_coord = lambda i:reduce(lambda x,p : x + p.coords[i], self.points, 0.0)
        if len(self.points) == 0:
            print "Dividing by 0"
            self.points = [1]
        centroid_coords = [reduce_coord(i) / len(self.points) for i in range(self.n)]

        return Point(centroid_coords)

def kmeans(points, k, cutoff):

    initial = random.sample(points, k)
    clusters = [Cluster([p]) for p in initial]
    print clusters
    while True:
        lists = [ [] for c in clusters]
        for p in points:
            smallest_distance   = getDistance(p, clusters[0].centroid)
            index = 0

            for i in range(len(clusters[1:])):
                distance = getDistance(p, clusters[i+1].centroid)
                if distance < smallest_distance:

                    smallest_distance = distance
                    index = i+1

                lists[index].append(p)
            biggest_shift = 0.0

            for i in range(len(clusters)):
                shift = clusters[i].update(lists[i])
                biggest_shift = max(biggest_shift, shift)

            if biggest_shift < cutoff:
                break

    return clusters

def getDistance(a, b):

    if a.n != b.n:
        raise Exception("ILLEGAL: non comparable points")

    ret = reduce(lambda x, y: x + pow((a.coords[y] - b.coords[y]), 2), range(a.n), 0.0)
    return math.sqrt(ret)

def makeRandomPoint(n, lower, upper):
    return Point([random.uniform(lower, upper) for i in range(n)])

def main():
    num_points, dim, k, cutoff, lower, upper = 10, 2, 3, 0.5, 0, 200
    points = map(lambda i: makeRandomPoint(dim, lower, upper), range(num_points))

    clusters = kmeans(points, k, cutoff)

    for i, c in enumerate(clusters):
        for p in c.points:
            print "Cluster: ", i, "\t Point: ", p


if __name__ == "__main__":
    main()

案の定、それは機能していません！

  Traceback (most recent call last):
  File "C:\Users\philippe\Documents\workspace-sts-2.7.2.RELEASE\scribber\kmeans\kmeans.py", line 100, in ?
    main()    
  File "C:\Users\philippe\Documents\workspace-sts-2.7.2.RELEASE\scribber\kmeans\kmeans.py", line 92, in main
[    clusters = kmeans(points, k, cutoff)
[[89.152748179548524, 81.217634455465131]], [[83.439023369838509, 169.75355953688432]], [[1.8622622156419633, 41.364078271733739]]]
Dividing by 0
  File "C:\Users\philippe\Documents\workspace-sts-2.7.2.RELEASE\scribber\kmeans\kmeans.py", line 69, in kmeans
    shift = clusters[i].update(lists[i])
  File "C:\Users\philippe\Documents\workspace-sts-2.7.2.RELEASE\scribber\kmeans\kmeans.py", line 35, in update
    self.centroid   = self.calculateCentroid()
  File "C:\Users\philippe\Documents\workspace-sts-2.7.2.RELEASE\scribber\kmeans\kmeans.py", line 43, in calculateCentroid
    centroid_coords = [reduce_coord(i) / len(self.points) for i in range(self.n)]
  File "C:\Users\philippe\Documents\workspace-sts-2.7.2.RELEASE\scribber\kmeans\kmeans.py", line 39, in <lambda>
    reduce_coord = lambda i:reduce(lambda x,p : x + p.coords[i], self.points, 0.0)
  File "C:\Users\philippe\Documents\workspace-sts-2.7.2.RELEASE\scribber\kmeans\kmeans.py", line 39, in <lambda>
    reduce_coord = lambda i:reduce(lambda x,p : x + p.coords[i], self.points, 0.0)
AttributeError: 'int' object has no attribute 'coords'

lists関数でプリントインすると、kmeans(points, k, cutoff)が得られ [[], [], []]ます。私はそれを理解しようとしていますが、なぜ空のリストが返されるのですか。コード全体を投稿したので、コードを実行してエラーを再現できます。エラーログでは、「clusters」とは何か、つまりポイントのリストを確認できます。ありがとう

score 1 · Accepted Answer

問題は、特定のクラスターに最も近いポイントのリストが空の場合 (すべてのポイントが別のクラスターに近い場合)、0 による除算エラーが発生し、その時点でガベージデータを self.points に割り当てることです。これにより、表示されている最終的なエラーが発生します。

これは、2 つのクラスターが同じ重心を持つ場合に可能です。この場合、2 番目のクラスターにポイントが割り当てられることはありません。

ちなみに、もう一つバグがあります。list[index].append(p) の前に余分なインデントがありますとにかくきれいにするために enumerate と min を使用してループ全体を書き直すことを検討する必要があります。

これが私が物事を書き直すことを提案する方法です。

while True:
    newPoints = dict([(c,[]) for c in clusters])
    for p in points:
        cluster = min(clusters, key = lambda c:getDistance(p, c.centroid))
        newPoints[cluster].append(p)

    biggest_shift = 0.0

    for c in clusters:
        if newPoints[c]:
            shift = c.update(newPoints[c])
            biggest_shift = max(biggest_shift, shift)

    if biggest_shift < cutoff:
        break

python - Pythonを使用したkmeans実装の空のリスト

1 に答える 1

Related

Reference