optimization - for ループ内のベクトル要素へのアクセス、いくつかの方法の比較

Question

ベクトルに格納されたデータにアクセスするいくつかの方法をチェックするために、小さなベンチマークプログラムを作成しました。私は対峙するために tww ネストされた for ループを使用しました A) 最初のループの反復ごとにベクトルのオブジェクトへの参照を作成し、その参照を 2 番目の for ループで使用します最初のループの反復ごとにベクトルのオブジェクトを取得し、そのポインターを 2 番目の for ループで使用する C) [] 演算子を使用してベクトルオブジェクトに直接アクセスする D) 最初のループで auto& を使用する

このようなネストされた for ループのペアは、それ自体がタイミング関数と共に別のループにネストされていました。for ループを 0 から 100 に変更してテストを数回実行したところ、これらすべてのメソッドで同じタイミング結果が得られました。常に約 0.150 秒で、変動は 0.02% でした。

私の質問は次のとおりです。

1) 私のテストは正しいですか?

2）私が見逃した最適化/別のアプローチはありますか？

これが私のコードです

#include <iostream>
#include <chrono>
#include <ratio>
#include <ctime>
#include <vector>
using namespace std;
using namespace std::chrono;


struct my_struct{

    vector<float> data;

    my_struct(int N, float x){

        for(int i=0;i<N;i++){
            data.push_back(cos(x+i));
        }
    }

    void work(){
        for(int i=0;i<data.size();i++){
            data[i]=data[i]*data[i];
         }
    }

};

int main(){
int N=100;
vector<my_struct> stuff;
for(int k=0; k<N; ++k){
    stuff.push_back( my_struct(100,sin(k)) );
}

vector<duration<double>> results_t1,results_t2,results_t3,results_t4;
high_resolution_clock::time_point t1 = high_resolution_clock::now();
for(int k=0; k<N; ++k){
    int which=0; //this is used to choose what method of access will be used
    switch(which){
        case 0:{ //pointer
            my_struct * thing=NULL;
            for( int i=0; i<N;++i){
                high_resolution_clock::time_point t2 = high_resolution_clock::now();
                for( int j=0; j<N;++j){
                    thing =&stuff[j];
                    for( int jj=0; jj<N;++jj)
                        thing->work();
                }
                duration<double> time_span = duration_cast<duration<double>>(t2 - t1);
                results_t1.push_back(time_span);
                t1=t2;
            }
            break;
        }

        case 1:{    //direct access
            for( int i=0; i<N;++i){
                high_resolution_clock::time_point t2 = high_resolution_clock::now();
                for( int j=0; j<N;++j){
                    for( int jj=0; jj<N;++jj)
                        stuff[j].work();
                }
                duration<double> time_span = duration_cast<duration<double>>(t2 - t1);
                results_t2.push_back(time_span);
                t1=t2;
            }
            break;
        }

        case 2:{    //auto reference
            for( int i=0; i<N;++i){
                high_resolution_clock::time_point t2 = high_resolution_clock::now();
                for( auto& temp : stuff){
                    for( int jj=0; jj<N;++jj)
                        temp.work();
                }
                duration<double> time_span = duration_cast<duration<double>>(t2 - t1);
                results_t3.push_back(time_span);
                t1=t2;
            }
            break;
        }
        case 3:{    //reference
            for( int i=0; i<N;++i){
                high_resolution_clock::time_point t2 = high_resolution_clock::now();
                for( int j=0; j<N;++j){
                    my_struct & temp =stuff[j];
                    for( int jj=0; jj<N;++jj)
                        temp.work();
                }
                duration<double> time_span = duration_cast<duration<double>>(t2 - t1);
                results_t4.push_back(time_span);
                t1=t2;
            }
            break;
        }

    }
}
double temp=0;
for(auto& t : results_t1){
temp+=t.count();
}
temp=temp/N;
std::cout << "pointer " << temp << " seconds.";
std::cout << std::endl;

temp=0.0;
for(auto& t : results_t2){
temp+=t.count();
}
temp=temp/N;
std::cout << "direct " << temp << " seconds.";
std::cout << std::endl;

temp=0.0;
for(auto& t : results_t3){
temp+=t.count();
}
temp=temp/N;
std::cout << "auto reference " << temp << " seconds.";
std::cout << std::endl;

    temp=0.0;
for(auto& t : results_t4){
temp+=t.count();
}
temp=temp/N;
std::cout << "reference " << temp << " seconds.";
std::cout << std::endl;

cin.get();

return 0;
}

optimization - for ループ内のベクトル要素へのアクセス、いくつかの方法の比較

1 に答える 1

Related

Reference