2
4

5 に答える 5

20

erase/uniqueと C++11 ラムダを使用します。

#include <algorithm>
#include <iostream>
#include <string>

int main()
{
    std::string text("_hello___world__");

    text.erase(
        std::unique(
            text.begin(),
            text.end(),
            [](char a, char b){ return (a == b) && (a == '_'); }
        ),
        text.end()
    );

    std::cout << text << '\n';

    return 0;
}

ラムダを使用したくない場合は、次のようなファンクターを定義できます。

class both_equal_to
{
    char value;
public:
    both_equal_to(char ch) : value(ch) {}

    bool operator()(char first, char second) const
    {
        return (first == second) && (first == value);
    }
};

次に、ラムダを に置き換えboth_equal_to('_')ます。

を使用しているだけchar*で、構築のコストを払いたくない場合std::stringは、次のコードが RolandXu のコードに近くなります。

char *convert_underscores(char *str)
{
    if (str)
    {
        size_t length = strlen(str);
        char *end = std::unique(str, str + length, both_equal_to('_'));
        *end = '\0';
    }
    return str;
}
于 2012-06-01T01:42:49.013 に答える
3

この投稿では、このページで送信されたメソッドの速度を比較しています。40文字の文字列で関数を100万回実行し、各アルゴリズムにかかる時間を計測しました。

所要時間| 使用したアルゴリズム

0.2秒 | char *を使用し、charへのポインタをジャグリングするRolandXuのバージョン。

0.4秒 | 高炉のバージョンパート2、ファンクター、ストリングなし。

2.7秒 | ファンクターとストリングを備えた高炉のバージョンパート1。

8.7秒 | 検索__を実行している文字列をループしているEricLのバージョンは_に置き換えます。

11.0秒 | Eric Lのバージョンは、各文字をループして文字列を組み立てます。

11.8秒 | remove_copy_ifを使用したJerryCoffinのバージョン。

上記のベンチマークとタイミングを証明するC++コード:

#include <iostream>
#include <cstdio>
#include <ctime>
#include <cstring>
#include <algorithm>

using namespace std;
string convert_underscores_by_EricL_using_string_replace(string str){

    //Cons:
        //This is the very slowest algorithm because the complexity of this operation
        //is O(n^2) and possibly higher with the overhead of string conversions.
    //Pros:
        //This is the function is the most concise, needing only 3 lines of code.

    while(str.find("__") != string::npos){
        str = str.replace(str.find("__"), 2, "_");
    }
    return str;
}


string convert_underscores_EricL_loop_over_a_string_and_remove_repeats(string str){

    //CONS:
        //Using a string really slows things down. Algorithm is too slow.
        //Not the most concise solution, 8 lines.
        //Has lots of ugly conditionals, x, and x-1, confusing to look at.
    //PROS:
        //Fastest function of those tested.  

    int len = str.length();
    string result = "";
    if (len < 2) return str;
    result += str[0];
    for(int x = 1; x < len; x++){
        if (str[x] != str[x-1] || str[x] != '_')
            result += str[x];
    }
    return result;
}




class repeated_by_jerry_coffin {
char prev;
char val;
public:
    repeated_by_jerry_coffin(char ch) : val(ch), prev(0) {}
    bool operator()(char ch) {
        bool ret = prev == val && ch == val;
        prev = ch;
        return ret;
    }
};
string convert_underscores_jerry_coffins_with_remove_copy_if(string str){

    //CONS:
        //Algorithm is the 2nd slowest algorithm.
    //PROS:
        //Concise, intuitive, needing only 4 lines.
        //Offloads the heavy lifting to std builtin methods: std::remove_copy_if and std::back_inserter

    string result = "";
    std::remove_copy_if(str.begin(), str.end(),
        std::back_inserter(result),
        repeated_by_jerry_coffin('_'));

    return result;
}


char* convert_underscores_by_RolandXu_using_char_stars_and_pointers(char *src){

    //CONS:
        //You have to get your hands dirty with C++ pointers.
    //PROS:
        //Fastest function of those tested.  

    char* readIndex;
    char* writeIndex;
    if (src==NULL) return NULL;
    readIndex = writeIndex = src;

    while (*readIndex != '\0')
    {
        while(*readIndex !='_' && *readIndex != '\0')
            *writeIndex++ = *readIndex++;

        if (*readIndex != '\0')
            *writeIndex++ = *readIndex++;

        while (*readIndex == '_')
            readIndex++;
    }
    *writeIndex = '\0';
    return src;
}


class both_equal_to__blastfurnace_version1{
char value;
public:
    both_equal_to__blastfurnace_version1(char ch) : value(ch) {}
    bool operator()(char first, char second) const{
        return (first == second) && (first == value);
    }
};
string convert_underscores_blastfurnace_version1_with_functor(string str){

    //CONS:
        //You have to create an entirely new class that overloads an operator.  
        //The speed is harmed by the usage of string.
    //PROS:
        //Don't need to roll your own loops with pointers.

    str.erase(
        std::unique(
            str.begin(),
            str.end(),
            both_equal_to__blastfurnace_version1('_')
        ),
        str.end()
    );

    return str;
}




class both_equal_to_blastfurnace_version2{
char value;
public:
    both_equal_to_blastfurnace_version2(char ch) : value(ch) {}
    bool operator()(char first, char second) const{
        return (first == second) && (first == value);
    }
};
char *convert_underscores_blastfurnace_version2_std_unique_and_no_string(char *str){
    //CONS:
        //No problems.
    //PROS:
        //More concise/intuitive than the fastest version and nearly as fast.  Winner!

    if (str){
        size_t length = strlen(str);
        char *end = std::unique(str, str + length, both_equal_to_blastfurnace_version2('_'));
        *end = '\0';
    }
    return str;
}

void assertCharStarEquals(char* a, char* b, string msg){
    if (strcmp(a, b) == 0) cout<<"passed" << endl;
    else cout << "Failed" << msg << " should be: '" << a << "'  it returned: '" << b << "'" << endl;
}
void assertStringEquals(string a, string b, string msg){
    if (a == b) cout<<"passed" << endl;
    else cout << "Failed" << msg << " should be: '" << a << "'  it returned: '" << b << "'" << endl;
}

void test01_convert_underscores_by_RolandXu_using_char_stars_and_pointers(int numtests, string str){
    char mystr[str.length()];
    strcpy(mystr, str.c_str());

    clock_t start = clock();
    int x = 0;
    while(x < numtests){
        char* s = convert_underscores_by_RolandXu_using_char_stars_and_pointers(mystr);
        x++;
    }
    double diff = (std::clock() - start) / (double)CLOCKS_PER_SEC;
    cout <<  diff << "   RolandXu's version using char*. " << '\n';

}

void test02_convert_underscores_blastfurnace_version2_std_unique_and_no_string(int numtests, string str){

    char mystr[str.length()];
    strcpy(mystr, str.c_str());

    clock_t start = clock();
    int x = 0;
    while(x < numtests){
        char* val = convert_underscores_blastfurnace_version2_std_unique_and_no_string(mystr);
        x++;
    }
    double diff = (std::clock() - start) / (double)CLOCKS_PER_SEC;
    cout << diff << "   Blastfurnace's version part two, functor, without string. " <<endl;
}

void test03_convert_underscores_blastfurnace_version1_with_functor(int numtests, string str){

    clock_t start = clock();
    int x = 0;
    while(x < numtests){
        string s = convert_underscores_blastfurnace_version1_with_functor(str);
        x++;
    }
    double diff = (std::clock() - start) / (double)CLOCKS_PER_SEC;
    cout << diff << "   Blastfurnace's version part one with the functor and string. " <<endl;
}
void test04_convert_underscores_by_EricL_using_string_replace(int numtests, string str){

    char mystr[str.length()];
    strcpy(mystr, str.c_str());

    clock_t start = clock();
    int x = 0;
    while(x < numtests){
        string s = convert_underscores_by_EricL_using_string_replace(mystr);
        x++;
    }
    double diff = (std::clock() - start) / (double)CLOCKS_PER_SEC;
    cout<<  diff << "   Eric L's version looping over the string doing find double underscore replace with single underscore. " <<endl;
}

void test05_convert_underscores_EricL_loop_over_a_string_and_remove_repeats(int numtests, string str){

    char mystr[str.length()];
    strcpy(mystr, str.c_str());

    clock_t start = clock();
    int x = 0;
    while(x < numtests){
        string s = convert_underscores_EricL_loop_over_a_string_and_remove_repeats(mystr);
        x++;
    }
    double diff = (std::clock() - start) / (double)CLOCKS_PER_SEC;
    cout <<  diff << "   Eric L's version looping over each char and assembling a string. "<< endl;
}


void test06_convert_underscores_jerry_coffins_with_remove_copy_if(int numtests, string str){
    clock_t start = clock();
    int x = 0;
    while(x < numtests){
        string s = convert_underscores_jerry_coffins_with_remove_copy_if(str);
        x++;
    }
    double diff = (std::clock() - start) / (double)CLOCKS_PER_SEC;
    cout<<  diff << "   Jerry Coffin's version with remove_copy_if. " <<endl;
}

int main(){
    cout << "GO!\n";

    int numtests = 1000000;
    string test_string = "__aa_a_aaa_--__&___aa_234______3)_!___<_";
    cout << "Time | Algorithm Used.\n";

    test01_convert_underscores_by_RolandXu_using_char_stars_and_pointers(numtests, test_string);

    test02_convert_underscores_blastfurnace_version2_std_unique_and_no_string(numtests, test_string);

    test03_convert_underscores_blastfurnace_version1_with_functor(numtests, test_string);

    test04_convert_underscores_by_EricL_using_string_replace(numtests, test_string);

    test05_convert_underscores_EricL_loop_over_a_string_and_remove_repeats(numtests, test_string);

    test06_convert_underscores_jerry_coffins_with_remove_copy_if(numtests, test_string);


    //Produces the following output:



    //Extra assertion testing to make sure everyone's algorithm is correct:
    char in[30];
    char out[30];
    strcpy(in, "a__");
    strcpy(out, "a_");
    assertCharStarEquals(out, convert_underscores_by_RolandXu_using_char_stars_and_pointers(in), "01");
    strcpy(in, "a_");
    strcpy(out, "a_");
    assertCharStarEquals(out, convert_underscores_by_RolandXu_using_char_stars_and_pointers(in), "02");
    strcpy(in, "_______");
    strcpy(out, "_");
    assertCharStarEquals(out, convert_underscores_by_RolandXu_using_char_stars_and_pointers(in), "03");
    strcpy(in, "__a");
    strcpy(out, "_a");
    assertCharStarEquals(out, convert_underscores_by_RolandXu_using_char_stars_and_pointers(in), "04");
    strcpy(in, "_hello___world__");
    strcpy(out, "_hello_world_");
    assertCharStarEquals(out, convert_underscores_by_RolandXu_using_char_stars_and_pointers(in), "05");
    strcpy(in, "");
    strcpy(out, "");
    assertCharStarEquals(out, convert_underscores_by_RolandXu_using_char_stars_and_pointers(in), "06");
    strcpy(in, "  __  ");
    strcpy(out, "  _  ");
    assertCharStarEquals(out, convert_underscores_by_RolandXu_using_char_stars_and_pointers(in), "07");
    strcpy(in, "U+221E");
    strcpy(out, "U+221E");
    assertCharStarEquals(out, convert_underscores_by_RolandXu_using_char_stars_and_pointers(in), "08");
    strcpy(in, "__\u00b2__");
    strcpy(out, "_\u00b2_");
    assertCharStarEquals(out, convert_underscores_by_RolandXu_using_char_stars_and_pointers(in), "09");


    cout<< "OK\n";


    strcpy(in, "a__");
    strcpy(out, "a_");
    assertCharStarEquals(out, convert_underscores_blastfurnace_version2_std_unique_and_no_string(in), "01");
    strcpy(in, "a_");
    strcpy(out, "a_");
    assertCharStarEquals(out, convert_underscores_blastfurnace_version2_std_unique_and_no_string(in), "02");
    strcpy(in, "_______");
    strcpy(out, "_");
    assertCharStarEquals(out, convert_underscores_blastfurnace_version2_std_unique_and_no_string(in), "03");
    strcpy(in, "__a");
    strcpy(out, "_a");
    assertCharStarEquals(out, convert_underscores_blastfurnace_version2_std_unique_and_no_string(in), "04");
    strcpy(in, "_hello___world__");
    strcpy(out, "_hello_world_");
    assertCharStarEquals(out, convert_underscores_blastfurnace_version2_std_unique_and_no_string(in), "05");
    strcpy(in, "");
    strcpy(out, "");
    assertCharStarEquals(out, convert_underscores_blastfurnace_version2_std_unique_and_no_string(in), "06");
    strcpy(in, "  __  ");
    strcpy(out, "  _  ");
    assertCharStarEquals(out, convert_underscores_blastfurnace_version2_std_unique_and_no_string(in), "07");
    strcpy(in, "U+221E");
    strcpy(out, "U+221E");
    assertCharStarEquals(out, convert_underscores_blastfurnace_version2_std_unique_and_no_string(in), "08");
    strcpy(in, "__\u00b2__");
    strcpy(out, "_\u00b2_");
    assertCharStarEquals(out, convert_underscores_blastfurnace_version2_std_unique_and_no_string(in), "09");



    cout<< "OK\n";


    string in_s = "a__";
    string out_s = "a_";
    assertStringEquals(out_s, convert_underscores_blastfurnace_version1_with_functor(in_s), "01");
    in_s = "a_";
    out_s = "a_";
    assertStringEquals(out_s, convert_underscores_blastfurnace_version1_with_functor(in_s), "02");
    in_s = "_______";
    out_s = "_";
    assertStringEquals(out_s, convert_underscores_blastfurnace_version1_with_functor(in_s), "03");
    in_s = "__a";
    out_s = "_a";
    assertStringEquals(out_s, convert_underscores_blastfurnace_version1_with_functor(in_s), "04");
    in_s = "_hello___world__";
    out_s = "_hello_world_";
    assertStringEquals(out_s, convert_underscores_blastfurnace_version1_with_functor(in_s), "05");
    in_s = "";
    out_s = "";
    assertStringEquals(out_s, convert_underscores_blastfurnace_version1_with_functor(in_s), "06");
    in_s = "  __  ";
    out_s = "  _  ";
    assertStringEquals(out_s, convert_underscores_blastfurnace_version1_with_functor(in_s), "07");
    in_s = "U+221E";
    out_s = "U+221E";
    assertStringEquals(out_s, convert_underscores_blastfurnace_version1_with_functor(in_s), "08");
    in_s = "__\u00b2__";
    out_s = "_\u00b2_";
    assertStringEquals(out_s, convert_underscores_blastfurnace_version1_with_functor(in_s), "09");



    cout<< "OK\n";


    in_s = "a__";
    out_s = "a_";
    assertStringEquals(out_s, convert_underscores_by_EricL_using_string_replace(in_s), "01");
    in_s = "a_";
    out_s = "a_";
    assertStringEquals(out_s, convert_underscores_by_EricL_using_string_replace(in_s), "02");
    in_s = "_______";
    out_s = "_";
    assertStringEquals(out_s, convert_underscores_by_EricL_using_string_replace(in_s), "03");
    in_s = "__a";
    out_s = "_a";
    assertStringEquals(out_s, convert_underscores_by_EricL_using_string_replace(in_s), "04");
    in_s = "_hello___world__";
    out_s = "_hello_world_";
    assertStringEquals(out_s, convert_underscores_by_EricL_using_string_replace(in_s), "05");
    in_s = "";
    out_s = "";
    assertStringEquals(out_s, convert_underscores_by_EricL_using_string_replace(in_s), "06");
    in_s = "  __  ";
    out_s = "  _  ";
    assertStringEquals(out_s, convert_underscores_by_EricL_using_string_replace(in_s), "07");
    in_s = "U+221E";
    out_s = "U+221E";
    assertStringEquals(out_s, convert_underscores_by_EricL_using_string_replace(in_s), "08");
    in_s = "__\u00b2__";
    out_s = "_\u00b2_";
    assertStringEquals(out_s, convert_underscores_by_EricL_using_string_replace(in_s), "09");



    cout<< "OK\n";


    in_s = "a__";
    out_s = "a_";
    assertStringEquals(out_s, convert_underscores_EricL_loop_over_a_string_and_remove_repeats(in_s), "01");
    in_s = "a_";
    out_s = "a_";
    assertStringEquals(out_s, convert_underscores_EricL_loop_over_a_string_and_remove_repeats(in_s), "02");
    in_s = "_______";
    out_s = "_";
    assertStringEquals(out_s, convert_underscores_EricL_loop_over_a_string_and_remove_repeats(in_s), "03");
    in_s = "__a";
    out_s = "_a";
    assertStringEquals(out_s, convert_underscores_EricL_loop_over_a_string_and_remove_repeats(in_s), "04");
    in_s = "_hello___world__";
    out_s = "_hello_world_";
    assertStringEquals(out_s, convert_underscores_EricL_loop_over_a_string_and_remove_repeats(in_s), "05");
    in_s = "";
    out_s = "";
    assertStringEquals(out_s, convert_underscores_EricL_loop_over_a_string_and_remove_repeats(in_s), "06");
    in_s = "  __  ";
    out_s = "  _  ";
    assertStringEquals(out_s, convert_underscores_EricL_loop_over_a_string_and_remove_repeats(in_s), "07");
    in_s = "U+221E";
    out_s = "U+221E";
    assertStringEquals(out_s, convert_underscores_EricL_loop_over_a_string_and_remove_repeats(in_s), "08");
    in_s = "__\u00b2__";
    out_s = "_\u00b2_";
    assertStringEquals(out_s, convert_underscores_EricL_loop_over_a_string_and_remove_repeats(in_s), "09");




    cout<< "OK\n";


    in_s = "a__";
    out_s = "a_";
    assertStringEquals(out_s, convert_underscores_jerry_coffins_with_remove_copy_if(in_s), "01");
    in_s = "a_";
    out_s = "a_";
    assertStringEquals(out_s, convert_underscores_jerry_coffins_with_remove_copy_if(in_s), "02");
    in_s = "_______";
    out_s = "_";
    assertStringEquals(out_s, convert_underscores_jerry_coffins_with_remove_copy_if(in_s), "03");
    in_s = "__a";
    out_s = "_a";
    assertStringEquals(out_s, convert_underscores_jerry_coffins_with_remove_copy_if(in_s), "04");
    in_s = "_hello___world__";
    out_s = "_hello_world_";
    assertStringEquals(out_s, convert_underscores_jerry_coffins_with_remove_copy_if(in_s), "05");
    in_s = "";
    out_s = "";
    assertStringEquals(out_s, convert_underscores_jerry_coffins_with_remove_copy_if(in_s), "06");
    in_s = "  __  ";
    out_s = "  _  ";
    assertStringEquals(out_s, convert_underscores_jerry_coffins_with_remove_copy_if(in_s), "07");
    in_s = "U+221E";
    out_s = "U+221E";
    assertStringEquals(out_s, convert_underscores_jerry_coffins_with_remove_copy_if(in_s), "08");
    in_s = "__\u00b2__";
    out_s = "_\u00b2_";
    assertStringEquals(out_s, convert_underscores_jerry_coffins_with_remove_copy_if(in_s), "09");





    return 0;
}

私たちは何を学びましたか?

  1. C ++文字列では、コンパイラが文字列のメモリを1つずつ忠実にステップ実行し、メモリ内の文字列の終わりを見つけてカウントするため、'str.length()'は非常に重いです。文字列を使用しないでください。また、str.length();を使用しないでください。

  2. str.end()を使用すると、項目1と同様にO(n)パフォーマンスヒットも呼び出されます。str.end()は使用しないでください。

  3. char配列でのstd::uniqueの使用は最適化されており、非常に高速です。文字列を連結したforループよりも1桁高速です。

  4. C ++では、mystring [x]を実行すると、メモリ内のそのスロットのメモリルックアップが発生します。これには時間がかかり、ポインタを使用してポインタに1を追加するよりもはるかに低速です。xで反復するループ内にmystring[x]を配置しないでください。

  5. 文字列を使用する必要がある場合は、mystring + =anotherstring[x]を入れないでください。文字列は、この行を実行するたびに、文字列全体を1つずつステップ実行する必要があります。

  6. 文字列を連結したり、メモリのチャンクを取得したり、ポインタを定義したり、ポインタに文字を配置したり、ポインタをインクリメントしたりしないでください。ループと文字列との連結は、O(n)の複雑さを引き起こします。

この日はたくさんの学びがあり、素晴らしい歌が歌われます。

于 2012-06-01T03:58:11.693 に答える
3

ライブラリなし:

#include <stdio.h>
char *RemoveUnderScore(char *src)
{
    char* readIndex;
    char* writeIndex;
    if (src==NULL) return NULL;
    readIndex = writeIndex = src;

    while (*readIndex != '\0')
    {
        while(*readIndex !='_' && *readIndex != '\0')
            *writeIndex++ = *readIndex++;

        if (*readIndex != '\0')
            *writeIndex++ = *readIndex++;

        while (*readIndex == '_')
            readIndex++;
    }

    *writeIndex = '\0';
    return src;
}

int main(int argc,char** argv){
    char str[] = "_hello___worl__d___";
    printf(RemoveUnderScore(str));
    return 0;
}
于 2012-06-01T01:47:15.157 に答える
2

私は次のようなものを使用すると思いますstd::remove_copy_if

char prev;

std::remove_copy_if(input.begin(), input.end(), 
    std::back_inserter(result), 
    [&prev] (char ch) ->bool { 
        bool ret=prev=='_' && ch == '_'; 
        prev=ch; 
        return ret;
});

または、C ++ 03で立ち往生している場合は、ラムダの代わりに明示的なファンクターを使用して同じことを行うことができます。

class repeated { 
    char prev;
    char val;
public:
    repeated(char ch) : val(ch), prev(0) {}

    bool operator()(char ch) { 
        bool ret = prev == val && ch == val; 
        prev = ch;
        return ret;
    }
};

std::remove_copy_if(input.begin(), input.end(), 
                    std::back_inserter(result), 
                    repeated('_'));

いずれにせよ、各文字を入力から出力に(最大で)1回だけコピーします。ここで、を使用std::string::replaceすると、左側に繰り返されるアンダースコアの数と同じ頻度で各文字がコピーされます。

編集:@blastfurnaceの答えを見ても、実際にこれを使用するかどうかはわかりません-std::uniqueおそらくこの仕事に適しています。

于 2012-06-01T02:18:42.517 に答える
2

In C++, what I gave, very inefficiently:

string convert_underscores(string str){
    int len = str.length();   
    //bad, str.length() incurs O(n) complexity.

    string result = "";
    //bad, don't use string.  string makes everything slow.

    if (len < 2) return str;  
    result += str[0];
    //bad bad bad, string concatenation incurs O(n) complexity.

    for(int x = 1; x < len; x++){  
        //This for loop incurs unnecessary management overhead.

        if (str[x] != str[x-1] || str[x] != '_'){
            result += str[x];  
            //concatenation extremely evil here: costs O(n)
            //A lookup on str[x] costs time.  
        }
    }

    return result;  
    //returning a string bad, the entire memory is moved 
    //instead of just a pointer to the first slot.
}

or even less efficient with str.replace.

string convert_underscores(string str){
    while(str.find("__") != string::npos){ 
        str = str.replace(str.find("__"), 2, "_");
    }
    return str;
}
//find incurs unnecessary O(n) complexity. string copy incurs 
//a full memory movement on contents of string.  
//n passes are required over the string.  
于 2012-06-01T01:19:10.880 に答える