c++ - C＃HashSetのような高速C++コンテナと辞書？

Question

私はC＃でHashSetとDictionaryをよく使用しましたが、非常に高速であることがわかりました...

std::mapとstd::hash_mapを使用してみましたが、比較が非常に遅いことがわかりました。これは期待される動作のように聞こえますか？std :: hash_mapの使用で間違っている可能性のあることがありますか？

または、より良いC ++ハッシュコンテナがありますか？

私はint32をハッシュしています。通常はそのうちの約100,000です。

更新：C＃とC++で再現を作成しました。2つのトライアルを実行します。C＃では19ミリ秒と13ミリ秒、C++では約11,000ミリ秒かかります。私のC++コードに本当に何か問題があるに違いありません:)

（どちらもリリースビルドとして実行されました。どちらもコンソールアプリです）

C＃出力：

Found 511 values in the intersection, in 19 ms
Found 508 values in the intersection, in 13 ms

C ++出力：

Found 308 values in the intersection, in 11764.7ms
Found 316 values in the intersection, in 11742.8ms

C ++出力（std::mapの代わりにstdext::hash_mapを使用）

Found 300 values in the intersection, in 383.552ms
Found 306 values in the intersection, in 2277.02ms

C ++出力（stdext :: hash_map、リリースx64ビルドを使用）

Found 292 values in the intersection, in 1037.67ms
Found 302 values in the intersection, in 3663.71ms

ノート：

Set2は、C ++で必要なように入力されていません。これは、Set1と50％の交差があると予想していましたが（C＃の場合と同様）、何らかの理由で乱数を10倍する必要がありました。部分的に交差しない

C＃：

    static void Main(string[] args)
    {
        int start = DateTime.Now.Millisecond;
        int intersectionSize = runIntersectionTest();
        int duration = DateTime.Now.Millisecond - start;

        Console.WriteLine(String.Format("Found {0} values in the intersection, in {1} ms", intersectionSize, duration));

        start = DateTime.Now.Millisecond;
        intersectionSize = runIntersectionTest();
        duration = DateTime.Now.Millisecond - start;

        Console.WriteLine(String.Format("Found {0} values in the intersection, in {1} ms", intersectionSize, duration));

        Console.ReadKey();
    }

    static int runIntersectionTest()
    {
        Random random = new Random(DateTime.Now.Millisecond);

        Dictionary<int,int> theMap = new Dictionary<int,int>();

        List<int> set1 = new List<int>();
        List<int> set2 = new List<int>();

        // Create 100,000 values for set1
        for ( int i = 0; i < 100000; i++ )
        {
            int value = 1000000000 + i;
            set1.Add(value);
        }

        // Create 1,000 values for set2
        for ( int i = 0; i < 1000; i++ )
        {
            int value = 1000000000 + (random.Next() % 200000 + 1);
            set2.Add(value);
        }

        // Now intersect the two sets by populating the map
        foreach( int value in set1 )
        {
            theMap[value] = 1;
        }

        int intersectionSize = 0;

        foreach ( int value in set2 )
        {
            int count;
            if ( theMap.TryGetValue(value, out count ) )
            {
                intersectionSize++;
                theMap[value] = 2;
            }
        }

        return intersectionSize;
    }

C ++：

int runIntersectionTest()
{
    std::map<int,int> theMap;

    vector<int> set1;
    vector<int> set2;

    // Create 100,000 values for set1
    for ( int i = 0; i < 100000; i++ )
    {
        int value = 1000000000 + i;
        set1.push_back(value);
    }

    // Create 1,000 values for set2
    for ( int i = 0; i < 1000; i++ )
    {
        int random = rand() % 200000 + 1;
        random *= 10;

        int value = 1000000000 + random;
        set2.push_back(value);
    }

    // Now intersect the two sets by populating the map
    for ( vector<int>::iterator iterator = set1.begin(); iterator != set1.end(); iterator++ )
    {
        int value = *iterator;

        theMap[value] = 1;
    }

    int intersectionSize = 0;

    for ( vector<int>::iterator iterator = set2.begin(); iterator != set2.end(); iterator++ )
    {
        int value = *iterator;

        map<int,int>::iterator foundValue = theMap.find(value);

        if ( foundValue != theMap.end() )
        {
            theMap[value] = 2;

            intersectionSize++;
        }
    }

    return intersectionSize;

}

int _tmain(int argc, _TCHAR* argv[])
{
    srand ( time(NULL) );

    Timer timer;
    int intersectionSize = runIntersectionTest();
    timer.Stop();

    cout << "Found " << intersectionSize << " values in the intersection, in " << timer.GetMilliseconds() << "ms" << endl;

    timer.Reset();
    intersectionSize = runIntersectionTest();
    timer.Stop();

    cout << "Found " << intersectionSize << " values in the intersection, in " << timer.GetMilliseconds() << "ms" << endl;

    getchar();

    return 0;
}

score 5 · Accepted Answer

Hash_mapとhash_setは非標準であり、unordered_mapとunordered_setは間もなく標準バージョンになる可能性が最も高いです。再生機がなければ、これはうまくいかないと思います。内部的には、これらは同じデータ構造であるため、同様のパフォーマンスが得られるはずです。

提供されたサンプルをMSVisualStudio2008v9.0.30729.1でVisualC++-> Win32-> Console Applicationとしてコンパイルしました（ただし、使用しているものがわからなかったため、独自のTimerクラスをロールしました）。デバッグ中、1000ミリ秒の時間がありましたが、リリース時のコンパイルは50ミリ秒でした。

#include <vector>
#include <iostream>
#include <map>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>

#include <windows.h>

typedef struct {
    LARGE_INTEGER start;
    LARGE_INTEGER stop;
} stopWatch;

class CStopWatch {

private:
    stopWatch timer;
    LARGE_INTEGER frequency;
    double LIToSecs( LARGE_INTEGER & L);
public:
    CStopWatch();
    void startTimer( );
    void stopTimer( );
    double getElapsedTime();
};

double CStopWatch::LIToSecs( LARGE_INTEGER & L) {
    return ((double)L.QuadPart /(double)frequency.QuadPart) ;
}

CStopWatch::CStopWatch(){
    timer.start.QuadPart=0;
    timer.stop.QuadPart=0;
    QueryPerformanceFrequency( &frequency ) ;
}

void CStopWatch::startTimer( ) {
    QueryPerformanceCounter(&timer.start) ;
}

void CStopWatch::stopTimer( ) {
    QueryPerformanceCounter(&timer.stop) ;
}

double CStopWatch::getElapsedTime() {
    LARGE_INTEGER time;
    time.QuadPart = timer.stop.QuadPart - timer.start.QuadPart;
    return LIToSecs( time) ;
}

using namespace std;
int runIntersectionTest()
{
    std::map<int,int> theMap;

    vector<int> set1;
    vector<int> set2;

    // Create 100,000 values for set1
    for ( int i = 0; i < 100000; i++ )
    {
        int value = 1000000000 + i;
        set1.push_back(value);
    }

    // Create 1,000 values for set2
    for ( int i = 0; i < 1000; i++ )
    {
        int random = rand() % 200000 + 1;
        random *= 10;

        int value = 1000000000 + random;
        set2.push_back(value);
    }

    // Now intersect the two sets by populating the map
    for ( vector<int>::iterator iterator = set1.begin(); iterator != set1.end(); iterator++ )
    {
        int value = *iterator;

        theMap[value] = 1;
    }

    int intersectionSize = 0;

    for ( vector<int>::iterator iterator = set2.begin(); iterator != set2.end(); iterator++ )
    {
        int value = *iterator;

        map<int,int>::iterator foundValue = theMap.find(value);

        if ( foundValue != theMap.end() )
        {
                theMap[value] = 2;

                intersectionSize++;
        }
    }

    return intersectionSize;

}

int main(int argc, char* argv[])
{
    srand ( time(NULL) );
    int tests = 2;
    while(tests--){
      CStopWatch timer;
      timer.startTimer();
      int intersectionSize = runIntersectionTest();
      timer.stopTimer();

      cout << "Found " << intersectionSize << " values in the intersection, in " << timer.getElapsedTime() << "s\r\n";
    }

    getchar();

    return 0;
}

（unordered_mapを試してみますが、私のバージョンにはありません）。C++のセットアップに問題があると思われます。

score 1 · Accepted Answer

私たちはなんとかこれの底に到達することができました、参照してください：

デバッガー/IDEを接続していると、STLコードの実行が非常に遅くなるのはなぜですか？

デバッガーを接続すると、別の（DEBUG）メモリヒープが使用されます。必要に応じてオフにすることができます。

score 0 · Accepted Answer

私はそれを使ったことがありませんが、 GoogleSparcehashがぴったりかもしれません

score 0 · Accepted Answer

C++コードでstd::mapを使用しています。これは、挿入時間とルックアップ時間がO（log（n））です。より良い比較を得るために、hash_mapでテストしてみてください。

score 0 · Accepted Answer

期待どおりではないようですが、実際にサポートする前に、詳細を収集する必要があります。誰のhash_map実装を使用していますか？プロファイラーをそれに向けましたか？もしそうなら、それはあなたに何を伝えましたか？

一般に、明らかな理由もなくハッシュテーブルの実装のパフォーマンスが低下している場合、通常は、テーブルが使用しているハッシュ関数が特定の入力に対してパフォーマンスが低下していることが原因です。それはあなたの問題かもしれません-C++hash_mapは、キーを小さな範囲のバケットにマップするハッシュ関数を使用しますが、C＃HashSetは使用しません-または、まったく異なるものである可能性があります。

std :: mapは通常、ツリーとして実装されるため、パフォーマンス特性が異なります。繰り返しますが、実装と入力データの詳細は重要です。

score 0 · Accepted Answer

あなたが本当に比較しているのは

O（1）であるC＃ハッシュセット。これは、ほぼ一定で、入力サイズに依存しないことを意味します。

対C++ベクトル....意味（入力のサイズ）×定数...

これはほとんど実用的な意味がありません。

（2007年のtr1の後）std :: tr1 :: unordered_set <...>（およびstd :: tr1 :: unordered_set <...>）であるC++のハッシュセットと同等のものを使用してみてください。

TR1のウィキペディアリンク

また、このページによると、 VisualStudioには独自の次善のstltr1実装があることに注意してください。（個人的な経験はありません、ここで見つけました）

c++ - C＃HashSetのような高速C++コンテナと辞書？

6 に答える 6

Related

Reference