c++ - セットの高速交差: C++ と C#

Question

Visual Studio 2008 SP1 を搭載した Vista x64 Business を実行しているマシン (クアッドコア、8 GB RAM) で、2 つの数値セットを非常に迅速に交差させようとしています。

C++ で 2 つのアプローチを実装し、C# で 1 つ実装しました。これまでのところ、C# のアプローチの方が高速です。C++ のアプローチを改善して、C# よりも高速にできるようにしたいと考えています。

C# の出力は次のとおりです: (リリースビルド)

Found the intersection 1000 times, in 4741.407 ms

2 つの異なるアプローチ (Release x64 build) の初期 C++ 出力を次に示します。

Found the intersection (using unordered_map) 1000 times, in 21580.7ms
Found the intersection (using set_intersection) 1000 times, in 22366.6ms

3 つのアプローチ (x64 ビルドのリリース) の最新の C++ 出力を次に示します。

最新のベンチマーク:

Found the intersection of 504 values (using unordered_map) 1000 times, in 28827.6ms
Found the intersection of 495 values (using set_intersection) 1000 times, in 9817.69ms
Found the intersection of 504 values (using unordered_set) 1000 times, in 24769.1ms

そのため、set_intersection アプローチは C# よりも約 2 倍遅くなりますが、最初の C++ アプローチよりも 2 倍速くなります。

最新の C++ コード:

Code:

// MapPerformance.cpp : Defines the entry point for the console application.
//

#include "stdafx.h"
#include <hash_map>
#include <vector>
#include <iostream>
#include <time.h>
#include <algorithm>
#include <set>
#include <unordered_set>

#include <boost\unordered\unordered_map.hpp>

#include "timer.h"

using namespace std;
using namespace stdext;
using namespace boost;
using namespace tr1;


int runIntersectionTest2(const vector<int>& set1, const vector<int>& set2)
{
    // hash_map<int,int> theMap;
    // map<int,int> theMap;
    unordered_set<int> theSet;      

     theSet.insert( set1.begin(), set1.end() );

    int intersectionSize = 0;

    vector<int>::const_iterator set2_end = set2.end();

    for ( vector<int>::const_iterator iterator = set2.begin(); iterator != set2_end; ++iterator )
    {
        if ( theSet.find(*iterator) != theSet.end() )
        {
                intersectionSize++;
        }
    }

    return intersectionSize;
}

int runIntersectionTest(const vector<int>& set1, const vector<int>& set2)
{
    // hash_map<int,int> theMap;
    // map<int,int> theMap;
    unordered_map<int,int> theMap;  

    vector<int>::const_iterator set1_end = set1.end();

    // Now intersect the two sets by populating the map
    for ( vector<int>::const_iterator iterator = set1.begin(); iterator != set1_end; ++iterator )
    {
        int value = *iterator;

        theMap[value] = 1;
    }

    int intersectionSize = 0;

    vector<int>::const_iterator set2_end = set2.end();

    for ( vector<int>::const_iterator iterator = set2.begin(); iterator != set2_end; ++iterator )
    {
        int value = *iterator;

        unordered_map<int,int>::iterator foundValue = theMap.find(value);

        if ( foundValue != theMap.end() )
        {
            theMap[value] = 2;

            intersectionSize++;
        }
    }

    return intersectionSize;

}

int runSetIntersection(const vector<int>& set1_unsorted, const vector<int>& set2_unsorted)
{   
    // Create two vectors
    std::vector<int> set1(set1_unsorted.size());
    std::vector<int> set2(set2_unsorted.size());

    // Copy the unsorted data into them
    std::copy(set1_unsorted.begin(), set1_unsorted.end(), set1.begin());
    std::copy(set2_unsorted.begin(), set2_unsorted.end(), set2.begin());

    // Sort the data
    sort(set1.begin(),set1.end());
    sort(set2.begin(),set2.end());

    vector<int> intersection;
    intersection.reserve(1000);

    set_intersection(set1.begin(),set1.end(), set2.begin(), set2.end(), back_inserter(intersection));

    return intersection.size(); 
}

void createSets( vector<int>& set1, vector<int>& set2 )
{
    srand ( time(NULL) );

    set1.reserve(100000);
    set2.reserve(1000);

    // Create 100,000 values for set1
    for ( int i = 0; i < 100000; i++ )
    {
        int value = 1000000000 + i;
        set1.push_back(value);
    }

    // Try to get half of our values intersecting
    float ratio = 200000.0f / RAND_MAX;


    // Create 1,000 values for set2
    for ( int i = 0; i < 1000; i++ )
    {
        int random = rand() * ratio + 1;

        int value = 1000000000 + random;
        set2.push_back(value);
    }

    // Make sure set1 is in random order (not sorted)
    random_shuffle(set1.begin(),set1.end());
}

int _tmain(int argc, _TCHAR* argv[])
{
    int intersectionSize = 0;

    vector<int> set1, set2; 
    createSets( set1, set2 );

    Timer timer;
    for ( int i = 0; i < 1000; i++ )
    {
        intersectionSize = runIntersectionTest(set1, set2);
    }
    timer.Stop();

    cout << "Found the intersection of " << intersectionSize << " values (using unordered_map) 1000 times, in " << timer.GetMilliseconds() << "ms" << endl;

    timer.Reset();
    for ( int i = 0; i < 1000; i++ )
    {
        intersectionSize = runSetIntersection(set1,set2);
    }
    timer.Stop();

    cout << "Found the intersection of " << intersectionSize << " values (using set_intersection) 1000 times, in " << timer.GetMilliseconds() << "ms" << endl;

    timer.Reset();
    for ( int i = 0; i < 1000; i++ )
    {
        intersectionSize = runIntersectionTest2(set1,set2);
    }
    timer.Stop();

    cout << "Found the intersection of " << intersectionSize << " values (using unordered_set) 1000 times, in " << timer.GetMilliseconds() << "ms" << endl;

    getchar();

    return 0;
}

C# コード:

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

namespace DictionaryPerformance
{
    class Program
    {
        static void Main(string[] args)
        {
            List<int> set1 = new List<int>(100000);
            List<int> set2 = new List<int>(1000);

            // Create 100,000 values for set1
            for (int i = 0; i < 100000; i++)
            {
                int value = 1000000000 + i;
                set1.Add(value);
            }

            Random random = new Random(DateTime.Now.Millisecond);

            // Create 1,000 values for set2
            for (int i = 0; i < 1000; i++)
            {
                int value = 1000000000 + (random.Next() % 200000 + 1);
                set2.Add(value);
            }

            long start = System.Diagnostics.Stopwatch.GetTimestamp();
            for (int i = 0; i < 1000; i++)
            {
                runIntersectionTest(set1,set2);
            }
            long duration = System.Diagnostics.Stopwatch.GetTimestamp() - start;

            Console.WriteLine(String.Format("Found the intersection 1000 times, in {0} ms", ((float) duration * 1000.0f) / System.Diagnostics.Stopwatch.Frequency));

            Console.ReadKey();
        }

        static int runIntersectionTest(List<int> set1, List<int> set2)
        {

            Dictionary<int,int> theMap = new Dictionary<int,int>(100000);

            // Now intersect the two sets by populating the map
            foreach( int value in set1 )
            {
                theMap[value] = 1;
            }

            int intersectionSize = 0;

            foreach ( int value in set2 )
            {
                int count;
                if ( theMap.TryGetValue(value, out count ) )
                {
                    theMap[value] = 2;
                    intersectionSize++;
                }
            }

            return intersectionSize;
        }
    }
}

C++ コード:

// MapPerformance.cpp : Defines the entry point for the console application.
//

#include "stdafx.h"
#include <hash_map>
#include <vector>
#include <iostream>
#include <time.h>
#include <algorithm>
#include <set>

#include <boost\unordered\unordered_map.hpp>

#include "timer.h"

using namespace std;
using namespace stdext;
using namespace boost;

int runIntersectionTest(vector<int> set1, vector<int> set2)
{
    // hash_map<int,int> theMap;
    // map<int,int> theMap;
    unordered_map<int,int> theMap;

    // Now intersect the two sets by populating the map
    for ( vector<int>::iterator iterator = set1.begin(); iterator != set1.end(); iterator++ )
    {
        int value = *iterator;

        theMap[value] = 1;
    }

    int intersectionSize = 0;

    for ( vector<int>::iterator iterator = set2.begin(); iterator != set2.end(); iterator++ )
    {
        int value = *iterator;

        unordered_map<int,int>::iterator foundValue = theMap.find(value);

        if ( foundValue != theMap.end() )
        {
            theMap[value] = 2;

            intersectionSize++;
        }
    }

    return intersectionSize;

}

int runSetIntersection(set<int> set1, set<int> set2)
{   
    set<int> intersection;

    set_intersection(set1.begin(),set1.end(), set2.begin(), set2.end(), inserter(intersection, intersection.end()));

    return intersection.size(); 
}



int _tmain(int argc, _TCHAR* argv[])
{
    srand ( time(NULL) );

    vector<int> set1;
    vector<int> set2;

    set1.reserve(10000);
    set2.reserve(1000);

    // Create 100,000 values for set1
    for ( int i = 0; i < 100000; i++ )
    {
        int value = 1000000000 + i;
        set1.push_back(value);
    }

    // Create 1,000 values for set2
    for ( int i = 0; i < 1000; i++ )
    {
        int random = rand() % 200000 + 1;
        random *= 10;

        int value = 1000000000 + random;
        set2.push_back(value);
    }


    Timer timer;
    for ( int i = 0; i < 1000; i++ )
    {
        runIntersectionTest(set1, set2);
    }
    timer.Stop();

    cout << "Found the intersection (using unordered_map) 1000 times, in " << timer.GetMilliseconds() << "ms" << endl;

    set<int> set21;
    set<int> set22;

    // Create 100,000 values for set1
    for ( int i = 0; i < 100000; i++ )
    {
        int value = 1000000000 + i;
        set21.insert(value);
    }

    // Create 1,000 values for set2
    for ( int i = 0; i < 1000; i++ )
    {
        int random = rand() % 200000 + 1;
        random *= 10;

        int value = 1000000000 + random;
        set22.insert(value);
    }

    timer.Reset();
    for ( int i = 0; i < 1000; i++ )
    {
        runSetIntersection(set21,set22);
    }
    timer.Stop();

    cout << "Found the intersection (using set_intersection) 1000 times, in " << timer.GetMilliseconds() << "ms" << endl;

    getchar();

    return 0;
}

わかりました、これが最新のものです。いくつかの変更があります。

C++ セットが適切にセットアップされ、50% の共通部分が作成されました (C# のように)。
Set1 はシャッフルされているためソートされず、set2 はすでにソートされていません
set_intersection の実装はベクトルを使用し、それらを最初にソートするようになりました

C++ (リリース、x64) 結果:

Found the intersection of 503 values (using unordered_map) 1000 times, in 35131.1ms
Found the intersection of 494 values (using set_intersection) 1000 times, in 10317ms

そのため、C# よりも 2 倍遅くなります。@Jalf: かなり高速な数値を取得していますが、ここで間違っていることはありますか?

C++ コード:

// MapPerformance.cpp : Defines the entry point for the console application.
//

#include "stdafx.h"
#include <hash_map>
#include <vector>
#include <iostream>
#include <time.h>
#include <algorithm>
#include <set>

#include <boost\unordered\unordered_map.hpp>

#include "timer.h"

using namespace std;
using namespace stdext;
using namespace boost;

int runIntersectionTest(const vector<int>& set1, const vector<int>& set2)
{
    // hash_map<int,int> theMap;
    // map<int,int> theMap;
    unordered_map<int,int> theMap;  

    vector<int>::const_iterator set1_end = set1.end();

    // Now intersect the two sets by populating the map
    for ( vector<int>::const_iterator iterator = set1.begin(); iterator != set1_end; ++iterator )
    {
        int value = *iterator;

        theMap[value] = 1;
    }

    int intersectionSize = 0;

    vector<int>::const_iterator set2_end = set2.end();

    for ( vector<int>::const_iterator iterator = set2.begin(); iterator != set2_end; ++iterator )
    {
        int value = *iterator;

        unordered_map<int,int>::iterator foundValue = theMap.find(value);

        if ( foundValue != theMap.end() )
        {
            theMap[value] = 2;

            intersectionSize++;
        }
    }

    return intersectionSize;

}

int runSetIntersection(const vector<int> set1_unsorted, const vector<int> set2_unsorted)
{   
    // Create two vectors
    std::vector<int> set1(set1_unsorted.size());
    std::vector<int> set2(set2_unsorted.size());

    // Copy the unsorted data into them
    std::copy(set1_unsorted.begin(), set1_unsorted.end(), set1.begin());
    std::copy(set2_unsorted.begin(), set2_unsorted.end(), set2.begin());

    // Sort the data
    sort(set1.begin(),set1.end());
    sort(set2.begin(),set2.end());

    vector<int> intersection;
    intersection.reserve(1000);

    set_intersection(set1.begin(),set1.end(), set2.begin(), set2.end(), inserter(intersection, intersection.end()));

    return intersection.size(); 
}

void createSets( vector<int>& set1, vector<int>& set2 )
{
    srand ( time(NULL) );

    set1.reserve(100000);
    set2.reserve(1000);

    // Create 100,000 values for set1
    for ( int i = 0; i < 100000; i++ )
    {
        int value = 1000000000 + i;
        set1.push_back(value);
    }

    // Try to get half of our values intersecting
    float ratio = 200000.0f / RAND_MAX;


    // Create 1,000 values for set2
    for ( int i = 0; i < 1000; i++ )
    {
        int random = rand() * ratio + 1;

        int value = 1000000000 + random;
        set2.push_back(value);
    }

    // Make sure set1 is in random order (not sorted)
    random_shuffle(set1.begin(),set1.end());
}

int _tmain(int argc, _TCHAR* argv[])
{
    int intersectionSize = 0;

    vector<int> set1, set2; 
    createSets( set1, set2 );

    Timer timer;
    for ( int i = 0; i < 1000; i++ )
    {
        intersectionSize = runIntersectionTest(set1, set2);
    }
    timer.Stop();

    cout << "Found the intersection of " << intersectionSize << " values (using unordered_map) 1000 times, in " << timer.GetMilliseconds() << "ms" << endl;

    timer.Reset();
    for ( int i = 0; i < 1000; i++ )
    {
        intersectionSize = runSetIntersection(set1,set2);
    }
    timer.Stop();

    cout << "Found the intersection of " << intersectionSize << " values (using set_intersection) 1000 times, in " << timer.GetMilliseconds() << "ms" << endl;

    getchar();

    return 0;
}

score 28 · Accepted Answer

テストにはいくつかの問題があります。

まず、集合交差をテストしていませんが、「いくつかの配列を作成し、それらに乱数を入力してから、集合交差を実行します」。実際に関心のあるコードの部分だけを測定する必要があります。これらのことを実行したい場合でも、ここでベンチマークするべきではありません。不確実性を減らすために、一度に 1 つのことを測定します。C++ 実装のパフォーマンスを向上させたい場合は、まず、その実装のどの部分が予想よりも遅いかを知る必要があります。つまり、設定コードを交差テストから分離する必要があります。

次に、キャッシュの影響やその他の不確実性を考慮して、テストを何度も実行する必要があります。(そして、おそらく、1000 回の実行ごとに個別の時間を出力するのではなく、1 つの合計時間を出力します。このようにして、0 ～ 20 ミリ秒の範囲で使用すると、分解能が制限され、不正確な結果を報告する可能性のあるタイマーからの不確実性を減らします。

さらに、ドキュメントから読み取ることができる限り、set_intersection への入力はソートする必要がありますが、set2 はソートされません。unordered_mapを使用する理由がないように思われますunordered_set。

必要な設定コードについては、交差を実行するためにベクトルを入力する必要はないことに注意してください。独自の実装とset_intersectionイテレータでの作業の両方が既に行われているため、入力が既に存在するデータ構造にイテレータのペアを渡すだけで済みます。

コードに関するいくつかの具体的なコメント:

++iteratorの代わりに使用iterator++
各ループ反復で vector.end() を呼び出すのではなく、一度呼び出して結果をキャッシュします
並べ替えられたベクトルと std::set とunordered_set(not unordered_map)を使用して実験してください

編集：

私はあなたの C# バージョンを試していないので、数値を適切に比較することはできませんが、これが私の変更されたテストです。4GB RAMを搭載したCore 2 Quad 2.5GHzで、それぞれが1000回実行されます。

std::set_intersection on std::set: 2606ms
std::set_intersection on tr1::unordered_set: 1014ms
std::set_intersection on sorted vectors: 171ms
std::set_intersection on unsorted vectors: 10140ms

最後のものは、ベクターのコピーと並べ替えの両方を行う必要があるため、少し不公平です。理想的には、並べ替えのみをベンチマークの一部にする必要があります。1000 個の並べ替えられていないベクトルの配列を使用するバージョンを作成しようとしましたが (そのため、反復ごとに並べ替えられていないデータをコピーする必要はありません)、パフォーマンスはほぼ同じか、少し悪化しました。だったので、このバージョンに戻しました

そして私のコード：

#define _SECURE_SCL 0

#include <ctime>
#include <vector>
#include <set>
#include <iostream>
#include <algorithm>
#include <unordered_set>
#include <windows.h>

template <typename T, typename OutIter>
void stl_intersect(const T& set1, const T& set2, OutIter out){
    std::set_intersection(set1.begin(), set1.end(), set2.begin(), set2.end(), out);
}

template <typename T, typename OutIter>
void sort_stl_intersect(T& set1, T& set2, OutIter out){
    std::sort(set1.begin(), set1.end());
    std::sort(set2.begin(), set2.end());
    std::set_intersection(set1.begin(), set1.end(), set2.begin(), set2.end(), out);
}


template <typename T>
void init_sorted_vec(T first, T last){
    for ( T cur = first; cur != last; ++cur)
    {
        int i = cur - first;
        int value = 1000000000 + i;
        *cur = value;
    }
}

template <typename T>
void init_unsorted_vec(T first, T last){
    for ( T cur = first; cur != last; ++cur)
    {
        int i = rand() % 200000 + 1;
        i *= 10;

        int value = 1000000000 + i;
        *cur = value;
    }
}

struct resize_and_shuffle {
    resize_and_shuffle(int size) : size(size) {}

    void operator()(std::vector<int>& vec){
        vec.resize(size);

    }
    int size;
};

int main()
{
    srand ( time(NULL) );
    std::vector<int> out(100000);

    std::vector<int> sortedvec1(100000);
    std::vector<int> sortedvec2(1000);

    init_sorted_vec(sortedvec1.begin(), sortedvec1.end());
    init_unsorted_vec(sortedvec2.begin(), sortedvec2.end());
    std::sort(sortedvec2.begin(), sortedvec2.end());

    std::vector<int> unsortedvec1(sortedvec1.begin(), sortedvec1.end());
    std::vector<int> unsortedvec2(sortedvec2.begin(), sortedvec2.end());

    std::random_shuffle(unsortedvec1.begin(), unsortedvec1.end());
    std::random_shuffle(unsortedvec2.begin(), unsortedvec2.end());

    std::vector<int> vecs1[1000];
    std::vector<int> vecs2[1000];

    std::fill(vecs1, vecs1 + 1000, unsortedvec1);
    std::fill(vecs2, vecs2 + 1000, unsortedvec2);

    std::set<int> set1(sortedvec1.begin(), sortedvec1.end());
    std::set<int> set2(sortedvec2.begin(), sortedvec2.end());

    std::tr1::unordered_set<int> uset1(sortedvec1.begin(), sortedvec1.end());
    std::tr1::unordered_set<int> uset2(sortedvec2.begin(), sortedvec2.end());

    DWORD start, stop;
    DWORD delta[4];

    start = GetTickCount();
    for (int i = 0; i < 1000; ++i){
        stl_intersect(set1, set2, out.begin());
    }
    stop = GetTickCount();
    delta[0] = stop - start;

    start = GetTickCount();
    for (int i = 0; i < 1000; ++i){
        stl_intersect(uset1, uset2, out.begin());
    }
    stop = GetTickCount();
    delta[1] = stop - start;

    start = GetTickCount();
    for (int i = 0; i < 1000; ++i){
        stl_intersect(sortedvec1, sortedvec2, out.begin());
    }
    stop = GetTickCount();
    delta[2] = stop - start;

    start = GetTickCount();
    for (int i = 0; i < 1000; ++i){
        sort_stl_intersect(vecs1[i], vecs1[i], out.begin());
    }
    stop = GetTickCount();
    delta[3] = stop - start;

    std::cout << "std::set_intersection on std::set: " << delta[0] << "ms\n";
    std::cout << "std::set_intersection on tr1::unordered_set: " << delta[1] << "ms\n";
    std::cout << "std::set_intersection on sorted vectors: " << delta[2] << "ms\n";
    std::cout << "std::set_intersection on unsorted vectors: " << delta[3] << "ms\n";


    return 0;
}

C++ が常に C# より高速であるべき理由はありません。C# には、C++ と競合するには細心の注意が必要な重要な利点がいくつかあります。私が考えることができる主なものは、.NET ランドでは動的割り当てが途方もなく安いということです。C++ ベクトル、セット、または unordered_set (またはその他のコンテナー) をサイズ変更または拡張する必要があるたびに、非常にコストのかかるmalloc操作になります。.NET では、ヒープ割り当てはポインターにオフセットを追加するだけです。

したがって、C++ バージョンを競合させたい場合は、おそらくそれを解決して、実際のヒープ割り当てを実行しなくてもコンテナーのサイズを変更できるようにする必要があります。これには、おそらくコンテナーのカスタムアロケーターを使用します (おそらく、boost::pool が適している可能性があります)。賭けるか、自分で転がしてみてください)

もう 1 つの問題は、set_difference並べ替えられた入力でのみ機能することです。並べ替えを伴うテスト結果を再現するには、各反復で並べ替えられていないデータの新しいコピーを作成する必要があり、これにはコストがかかります (ただし、カスタムアロケーターを使用すると、多く）。入力がどのような形式かはわかりませんが、入力をコピーせずに直接並べ替えて、それをset_difference直接実行できる可能性があります。(入力が少なくとも配列または STL コンテナーである場合、これは簡単に実行できます。)

STL の主な利点の 1 つは、非常に柔軟で、ほとんどすべての入力シーケンスで機能することです。C# では、ほとんどの場合、入力を List や Dictionary などにコピーする必要がありますが、C++ では、そのままの入力を実行して回避できる場合がありstd::sortますset_intersection。

もちろん最後に、プロファイラーでコードを実行してみて、どこで時間が費やされているかを正確に確認してください。代わりに GCC を介してコードを実行することもできます。MSVC での STL のパフォーマンスは少し風変わりな場合があるというのが私の印象です。同様のタイミングが得られるかどうかを確認するためだけに、別のコンパイラでテストする価値があるかもしれません。

最後に、C++ と C# のパフォーマンスに関連する次のブログ投稿を見つけることができます: http://blogs.msdn.com/ricom/archive/2005/05/10/416151.aspx

それらの士気は、基本的にはい、C++でより良いパフォーマンスを得ることができるということですが、それは驚くべき量の作業です.

score 9 · Accepted Answer

すぐにわかる問題の 1 つは、C++ でセットを const 参照ではなく値で渡していることです。だから、あなたはそれらを渡すたびにそれらをコピーしています!

また、のターゲットにはセットを使用しませんset_intersection。私は次のようなものを使用します

int runSetIntersection(const set<int>& set1, const set<int>& set2)
{   
    vector<int> intersection;
    intersection.reserve(10000) // or whatever the max is

    set_intersection(set1.begin(),set1.end(), set2.begin(), set2.end(), back_inserter(intersection));

    return intersection.size(); 
}

ただし、このコードは関数内で割り当てを行います。さらに速いでしょう

int runSetIntersection(const set<int>& set1, const set<int>& set2, vector<int>& scratch)
{   
    scratch.reserve(10000) // or whatever the max is

    set_intersection(set1.begin(),set1.end(), set2.begin(), set2.end(), back_inserter(scratch));

    return scratch.size(); 
}

そして、タイマーを開始する前にスクラッチを割り当てます。

ただし、サイズだけを探している場合は、手書きの for ループを set::find と組み合わせると、さらに良い結果が得られる可能性があります。

score 4 · Accepted Answer

これを使って...

vector<int> set1(10000);
vector<int> set2(1000);

... ゼロ以外の初期サイズのベクトルを取得します。次に、push_back を使用せず、値を直接更新します。

score 2 · Accepted Answer

ところで、ソートされたセットが大きい場合、 std::set_intersection は最速のアルゴリズムではありません。std::set_intersection は最大 2*(m+n)-1 の比較を行いますが、Baeza-Yates のアルゴリズムのようなアルゴリズムの方が高速です。m が小さい場合、Baeza-Yates は O(m * log(n)) ですが、n = alpha * m の場合は O(n) です。基本的な考え方は、一種の双方向二分探索を行うことです。

http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.91.7899&rep=rep1&type=pdf

ソートされたシーケンスの高速交差アルゴリズムの実験的分析 Ricardo Baeza-Yates と Alejandro Salinger

また

R.バエザ・イェーツ。並べ替えられたシーケンスの高速集合交差アルゴリズム。組み合わせパターンマッチングに関する第 15 回年次シンポジウムの議事録 (CPM 2004)、Springer LNCS 3109、pp 400-408、イスタンブール、トルコ、2004 年 7 月。

以下は Erik Frey による説明と実装で、バイナリプローブを使用した std::set_intersection よりも大幅に高速な結果を示しています。私はまだ彼のコードを試していません。
http://fawx.com/

小さい方のセットで中央値の要素 A を選択します。
より大きなセットでその挿入位置要素 B を検索します。
A と B が等しい場合、要素を結果に追加します。
要素 A と B の両側にある空でないサブセットに対して、手順 1 ～ 4 を繰り返します。

;



/*
* baeza_intersect
*/
template< template  class Probe,
  class RandomAccessIterator, class OutputIterator>
void baeza_intersect(RandomAccessIterator begin1, RandomAccessIterator end1,
                     RandomAccessIterator begin2, RandomAccessIterator end2,
                     OutputIterator out)
{
  RandomAccessIterator probe1, probe2;

if ( (end1 - begin1) < ( end2 - begin2 ) )
  {
    if ( begin1 == end1 )
      return;
    probe1 = begin1 + ( ( end1 - begin1 ) >> 1 );
    probe2 = lower_bound< Probe >( begin2, end2, *probe1 );
    baeza_intersect< Probe >(begin1, probe1, begin2, probe2, out); // intersect left
    if (! (probe2 == end2 || *probe1 < *probe2 ))
      *out++ = *probe2++;
    baeza_intersect< Probe >(++probe1, end1, probe2, end2, out); // intersect right
  }
  else
  {
    if ( begin2 == end2 )
      return;
    probe2 = begin2 + ( ( end2 - begin2 ) >> 1 );
    probe1 = lower_bound< Probe >( begin1, end1, *probe2 );
    baeza_intersect< Probe >(begin1, probe1, begin2, probe2, out); // intersect left
    if (! (probe1 == end1 || *probe2 < *probe1 ))
      *out++ = *probe1++;
    baeza_intersect< Probe >(probe1, end1, ++probe2, end2, out); // intersect right
  }
}

/*
* with a comparator
*/
 template< template  class Probe,
      class RandomAccessIterator, class OutputIterator, class Comparator >
    void baeza_intersect(RandomAccessIterator begin1, RandomAccessIterator end1,
                         RandomAccessIterator begin2, RandomAccessIterator end2,
                         OutputIterator out, Comparator cmp)
    {
      RandomAccessIterator probe1, probe2;

  if ( (end1 - begin1) < ( end2 - begin2 ) )
  {
    if ( begin1 == end1 )
      return;
    probe1 = begin1 + ( ( end1 - begin1 ) >> 1 );
    probe2 = lower_bound< Probe >( begin2, end2, *probe1, cmp );
    baeza_intersect< Probe >(begin1, probe1, begin2, probe2, out, cmp); // intersect left
    if (! (probe2 == end2 || cmp( *probe1, *probe2 ) ))
      *out++ = *probe2++;
    baeza_intersect< Probe >(++probe1, end1, probe2, end2, out, cmp); // intersect right
  }
  else
  {
    if ( begin2 == end2 )
      return;
    probe2 = begin2 + ( ( end2 - begin2 ) >> 1 );
    probe1 = lower_bound< Probe >( begin1, end1, *probe2, cmp );
    baeza_intersect< Probe >(begin1, probe1, begin2, probe2, out, cmp); // intersect left
    if (! (probe1 == end1 || cmp( *probe2, *probe1 ) ))
      *out++ = *probe1++;
    baeza_intersect< Probe >(probe1, end1, ++probe2, end2, out, cmp); // intersect right
  }
}


// probe.hpp

/**
* binary probe: pick the next element by choosing the halfway point between low and high
*/
template< class RandomAccessIterator, class T >
struct binary_probe
{
  RandomAccessIterator operator()(RandomAccessIterator begin, RandomAccessIterator end, const T & value)
  {
    return begin + ( (end - begin) >> 1);
  }
};

/**
* lower_bound: like stl's lower_bound but with different kinds of probing
* note the appearance of the rare template parameter template!
*/
template< template  class Probe, class RandomAccessIterator, class T >
RandomAccessIterator lower_bound(RandomAccessIterator begin, RandomAccessIterator end, const T & value)
{
  RandomAccessIterator pit;
  Probe< RandomAccessIterator, T > pfunc; // probe-functor (wants to get func'd up)

while ( begin < end )
  {
    pit = pfunc(begin, end, value);
    if ( *pit < value )
      begin = pit + 1;
    else
      end = pit;
  }
  return begin;
}

/*
* this time with a comparator!
*/
template< template  class Probe, class RandomAccessIterator, class T, class Comparator >
RandomAccessIterator lower_bound(RandomAccessIterator begin, RandomAccessIterator end, const T & value, Comparator cmp)
{
  RandomAccessIterator pit;
  Probe< RandomAccessIterator, T > pfunc;

while ( begin < end )
  {
    pit = pfunc(begin, end, value);
    if ( cmp(*pit, value) )
      begin = pit + 1;
    else
      end = pit;
  }
  return begin;
}

score 2 · Accepted Answer

C++ の "runIntersectionTest" を変更して、各呼び出しでコンテナーをコピー構築するのではなく、コンテナーへの const 参照を取るようにします。(C# コードは ref を使用します。)

score 1 · Accepted Answer

Visual Studio を使用しているため、1 に設定されているかどうかを確認する必要があります_SECURE_SCL(通常、明示的に設定していない場合は 1 になります)。これが設定されている場合、リリースビルドであっても、すべての STL コードが範囲チェックされます。通常、コードの速度が 10 ～ 15% 低下します。

Microsoft は、範囲チェックが必要な場合、たとえば std::vector に既にインターフェイスがあることを認識していなかったようです: std::vector::at()!

（すみません、私の胸からそれを取り除かなければなりませんでした）。

とにかく、主な非効率性は、コンテナーを値で渡すのではなく、コピーしていることです。リンゴとバナナではなく、リンゴとリンゴを比較する (しようとする) 参照を使用します。

score 0 · Accepted Answer

0

C ++最適化フラグがオンになっていますか？

于 2009-06-29T22:31:33.650 に答える

score 0 · Accepted Answer

さて、多くのフィードバックの後、私は元の質問を何度も更新しました：

テストはそれぞれ1,000回実行されます
C＃コードは、より高解像度のタイマーを使用するようになりました
テストの前にデータ構造が入力されるようになりました

これまでのところ、C＃はC++よりも約5倍高速です。

あなたのアイデア/提案をみんなに感謝します。

score 0 · Accepted Answer

あなたはまだベクトルを値で渡しています。それらもコピーしていなければ問題ありません。

インサーターは、ベクトルの最後に値を入れていませんでした。配列の先頭 ( end が指していた場所) に値を挿入した後、最初の挿入でのみそれを行いました。

値を更新したときに、ハッシュマップバージョンで値を 2 回検索します。この値のイベントが更新されるのはなぜですか?

このコードを実行して、タイミングを投稿してください。

// MapPerformance.cpp : Defines the entry point for the console application.
//

#include "stdafx.h"
#include <hash_map>
#include <vector>
#include <iostream>
#include <time.h>
#include <algorithm>
#include <set>

#include <boost\unordered\unordered_set.hpp>

#include "timer.h"

using namespace std;
using namespace stdext;
using namespace boost;

int runIntersectionTest(const vector<int>& set1, const vector<int>& set2)
{
    // hash_map<int,int> theMap;
    // map<int,int> theMap;
    unordered_set<int> theSet;      

     theSet.insert( set1.begin(), set2.end() );

    int intersectionSize = 0;

    vector<int>::const_iterator set2_end = set2.end();

    for ( vector<int>::const_iterator iterator = set2.begin(); iterator != set2_end; ++iterator )
    {
        if ( theSet.find(*iterator) != theSet.end() )
        {
                intersectionSize++;
        }
    }

    return intersectionSize;
}

int runSetIntersection( vector<int> set1, vector<int> set2)
{   
    // Sort the data
    sort(set1.begin(),set1.end());
    sort(set2.begin(),set2.end());

    vector<int> intersection;
    intersection.reserve(1000);

    set_intersection(set1.begin(),set1.end(), set2.begin(), set2.end(), back_inserter(intersection));

    return intersection.size(); 
}

void createSets( vector<int>& set1, vector<int>& set2 )
{
    srand ( time(NULL) );

    set1.reserve(100000);
    set2.reserve(1000);

    // Create 100,000 values for set1
    for ( int i = 0; i < 100000; i++ )
    {
        int value = 1000000000 + i;
        set1.push_back(value);
    }

    // Try to get half of our values intersecting
    float ratio = 200000.0f / RAND_MAX;


    // Create 1,000 values for set2
    for ( int i = 0; i < 1000; i++ )
    {
        int random = rand() * ratio + 1;

        int value = 1000000000 + random;
        set2.push_back(value);
    }

    // Make sure set1 is in random order (not sorted)
    random_shuffle(set1.begin(),set1.end());
}

int _tmain(int argc, _TCHAR* argv[])
{
    int intersectionSize = 0;

    vector<int> set1, set2;     
    createSets( set1, set2 );

    Timer timer;
    for ( int i = 0; i < 1000; i++ )
    {
        intersectionSize = runIntersectionTest(set1, set2);
    }
    timer.Stop();

    cout << "Found the intersection of " << intersectionSize << " values (using unordered_map) 1000 times, in " << timer.GetMilliseconds() << "ms" << endl;

    timer.Reset();
    for ( int i = 0; i < 1000; i++ )
    {
        intersectionSize = runSetIntersection(set1,set2);
    }
    timer.Stop();

    cout << "Found the intersection of " << intersectionSize << " values (using set_intersection) 1000 times, in " << timer.GetMilliseconds() << "ms" << endl;

    getchar();

    return 0;
}

score 0 · Accepted Answer

アップデート：

ベクトルを使用するように set_intersection コードを変更し、(並べ替えられたセットクラスを使用する代わりに) それらを並べ替えました。

Found the intersection of 319 values (using unordered_map) 1000 times, in 22187.5ms
Found the intersection of 315 values (using set_intersection) 1000 times, in 2401.62ms

注意: 大きなセットはソートされて作成されるため、この例ではソートにそれほど時間がかからない可能性があります。

C++ コード:

// MapPerformance.cpp : Defines the entry point for the console application.
//

#include "stdafx.h"
#include <hash_map>
#include <vector>
#include <iostream>
#include <time.h>
#include <algorithm>
#include <set>

#include <boost\unordered\unordered_map.hpp>

#include "timer.h"

using namespace std;
using namespace stdext;
using namespace boost;

int runIntersectionTest(vector<int> set1, vector<int> set2)
{
    // hash_map<int,int> theMap;
    // map<int,int> theMap;
    unordered_map<int,int> theMap;

    // Now intersect the two sets by populating the map
    for ( vector<int>::iterator iterator = set1.begin(); iterator != set1.end(); iterator++ )
    {
        int value = *iterator;

        theMap[value] = 1;
    }

    int intersectionSize = 0;

    for ( vector<int>::iterator iterator = set2.begin(); iterator != set2.end(); iterator++ )
    {
        int value = *iterator;

        unordered_map<int,int>::iterator foundValue = theMap.find(value);

        if ( foundValue != theMap.end() )
        {
            theMap[value] = 2;

            intersectionSize++;
        }
    }

    return intersectionSize;

}

int runSetIntersection(vector<int> set1, vector<int> set2)
{   
    sort(set1.begin(),set1.end());
    sort(set2.begin(),set2.end());

    set<int> intersection;

    set_intersection(set1.begin(),set1.end(), set2.begin(), set2.end(), inserter(intersection, intersection.end()));

    return intersection.size(); 
}



int _tmain(int argc, _TCHAR* argv[])
{
    srand ( time(NULL) );

    vector<int> set1;
    vector<int> set2;

    set1.reserve(10000);
    set2.reserve(1000);

    // Create 100,000 values for set1
    for ( int i = 0; i < 100000; i++ )
    {
        int value = 1000000000 + i;
        set1.push_back(value);
    }

    // Create 1,000 values for set2
    for ( int i = 0; i < 1000; i++ )
    {
        int random = rand() % 200000 + 1;
        random *= 10;

        int value = 1000000000 + random;
        set2.push_back(value);
    }

    int intersectionSize = 0;


    Timer timer;
    for ( int i = 0; i < 1000; i++ )
    {
        intersectionSize = runIntersectionTest(set1, set2);
    }
    timer.Stop();

    cout << "Found the intersection of " << intersectionSize << " values (using unordered_map) 1000 times, in " << timer.GetMilliseconds() << "ms" << endl;

    timer.Reset();
    for ( int i = 0; i < 1000; i++ )
    {
        intersectionSize = runSetIntersection(set1,set2);
    }
    timer.Stop();

    cout << "Found the intersection of " << intersectionSize << " values (using set_intersection) 1000 times, in " << timer.GetMilliseconds() << "ms" << endl;

    getchar();

    return 0;
}

score 0 · Accepted Answer

あなたのソリューションが正常に機能していることは知っていますが、STL 実装を使用してみましたか:

すでにプラットフォーム用に最適化されている可能性があるので、試してみます

c++ - セットの高速交差: C++ と C#

13 に答える 13

Related

Reference