1
typedef unsigned long Count;
typedef float Weight;
typedef std::map<std::string, Count> StringToCountMap;
typedef std::map<std::string, Weight> StringToWeightMap;
typedef std::map<unsigned long, StringToCountMap> UnsignedToStringToCountMap;
typedef std::map<unsigned long, StringToWeightMap> UnsignedToStringToWeightMap;

typedef std::map<unsigned long, std::size_t> ClustersMap;


class DefaultClusteringAlgorithm
{
public:
    // minumum number of documents changing clusters for algorithm to end
    static const unsigned DocumentChangeThreshold = 0;

    DefaultClusteringAlgorithm(unsigned numClusters, const UnsignedToStringToWeightMap &documentVectors)
        : numClusters_(numClusters)
        , documentVectors_(documentVectors)
    {
    }

~DefaultClusteringAlgorithm() {}

const ClustersMap &DoClustering();

private:
    void ChooseInitialCentroids();
    unsigned ClusterOnCentroids();
    void RecalculateCentroids();
    float DocumentDotProduct(const StringToWeightMap &left, const StringToWeightMap &right);
    float DocumentLength(const StringToWeightMap &document);

    unsigned numClusters_;

    // stores cluster_id => centroid
    std::vector<StringToWeightMap> centroids_;

    // maps question id => cluster id
    ClustersMap clusters_;

    // document vector
    const UnsignedToStringToWeightMap &documentVectors_;
};

void DefaultClusteringAlgorithm::RecalculateCentroids()
{
    std::vector<unsigned> newCentroidsSizes(centroids_.size());
    std::vector<StringToWeightMap> newCentroids(centroids_.size());

    ClustersMap::const_iterator clusterMapping = clusters_.begin();

    for (; clusterMapping != clusters_.end(); ++clusterMapping)
    {
        std::size_t clusterId = clusterMapping->second;

        ++newCentroidsSizes[clusterId];
        const StringToWeightMap &document = documentVectors_.at(clusterMapping->first);

        StringToWeightMap::const_iterator termWeight = document.cbegin();

        for (; termWeight != document.end(); ++termWeight);
        {
            newCentroids[clusterId][termWeight->first] += termWeight->second;
        }
    }

    std::vector<unsigned>::iterator centroidSize = newCentroidsSizes.begin();

    for (; centroidSize != newCentroidsSizes.end(); ++centroidSize)
    {
        std::size_t clusterId = centroidSize - newCentroidsSizes.begin();

        StringToWeightMap::iterator centroidTermWeight = newCentroids[clusterId].begin();

        for (; centroidTermWeight != newCentroids[clusterId].end(); ++centroidTermWeight)
        {
            centroidTermWeight->second /= *centroidSize;
        }
    }
}

デバッガウォッチ

const_iterator termWeight の作成時に問題が発生します。

StringToWeightMap::const_iterator termWeight = document.begin();

上の画像でわかるように、termWeight const_iterator には無効なデータがあります。ただし、const std::map ドキュメントは完全に有効な std::map です。これが起こっている理由は考えられません。

最近 std::map::cbegin() が存在することを知りました。代わりにその方法を使用する必要がありますか?

編集:より多くのコンテキストが含まれています

4

2 に答える 2