5 に答える
erase/unique
と C++11 ラムダを使用します。
#include <algorithm>
#include <iostream>
#include <string>
int main()
{
std::string text("_hello___world__");
text.erase(
std::unique(
text.begin(),
text.end(),
[](char a, char b){ return (a == b) && (a == '_'); }
),
text.end()
);
std::cout << text << '\n';
return 0;
}
ラムダを使用したくない場合は、次のようなファンクターを定義できます。
class both_equal_to
{
char value;
public:
both_equal_to(char ch) : value(ch) {}
bool operator()(char first, char second) const
{
return (first == second) && (first == value);
}
};
次に、ラムダを に置き換えboth_equal_to('_')
ます。
を使用しているだけchar*
で、構築のコストを払いたくない場合std::string
は、次のコードが RolandXu のコードに近くなります。
char *convert_underscores(char *str)
{
if (str)
{
size_t length = strlen(str);
char *end = std::unique(str, str + length, both_equal_to('_'));
*end = '\0';
}
return str;
}
この投稿では、このページで送信されたメソッドの速度を比較しています。40文字の文字列で関数を100万回実行し、各アルゴリズムにかかる時間を計測しました。
所要時間| 使用したアルゴリズム
0.2秒 | char *を使用し、charへのポインタをジャグリングするRolandXuのバージョン。
0.4秒 | 高炉のバージョンパート2、ファンクター、ストリングなし。
2.7秒 | ファンクターとストリングを備えた高炉のバージョンパート1。
8.7秒 | 検索__を実行している文字列をループしているEricLのバージョンは_に置き換えます。
11.0秒 | Eric Lのバージョンは、各文字をループして文字列を組み立てます。
11.8秒 | remove_copy_ifを使用したJerryCoffinのバージョン。
上記のベンチマークとタイミングを証明するC++コード:
#include <iostream>
#include <cstdio>
#include <ctime>
#include <cstring>
#include <algorithm>
using namespace std;
string convert_underscores_by_EricL_using_string_replace(string str){
//Cons:
//This is the very slowest algorithm because the complexity of this operation
//is O(n^2) and possibly higher with the overhead of string conversions.
//Pros:
//This is the function is the most concise, needing only 3 lines of code.
while(str.find("__") != string::npos){
str = str.replace(str.find("__"), 2, "_");
}
return str;
}
string convert_underscores_EricL_loop_over_a_string_and_remove_repeats(string str){
//CONS:
//Using a string really slows things down. Algorithm is too slow.
//Not the most concise solution, 8 lines.
//Has lots of ugly conditionals, x, and x-1, confusing to look at.
//PROS:
//Fastest function of those tested.
int len = str.length();
string result = "";
if (len < 2) return str;
result += str[0];
for(int x = 1; x < len; x++){
if (str[x] != str[x-1] || str[x] != '_')
result += str[x];
}
return result;
}
class repeated_by_jerry_coffin {
char prev;
char val;
public:
repeated_by_jerry_coffin(char ch) : val(ch), prev(0) {}
bool operator()(char ch) {
bool ret = prev == val && ch == val;
prev = ch;
return ret;
}
};
string convert_underscores_jerry_coffins_with_remove_copy_if(string str){
//CONS:
//Algorithm is the 2nd slowest algorithm.
//PROS:
//Concise, intuitive, needing only 4 lines.
//Offloads the heavy lifting to std builtin methods: std::remove_copy_if and std::back_inserter
string result = "";
std::remove_copy_if(str.begin(), str.end(),
std::back_inserter(result),
repeated_by_jerry_coffin('_'));
return result;
}
char* convert_underscores_by_RolandXu_using_char_stars_and_pointers(char *src){
//CONS:
//You have to get your hands dirty with C++ pointers.
//PROS:
//Fastest function of those tested.
char* readIndex;
char* writeIndex;
if (src==NULL) return NULL;
readIndex = writeIndex = src;
while (*readIndex != '\0')
{
while(*readIndex !='_' && *readIndex != '\0')
*writeIndex++ = *readIndex++;
if (*readIndex != '\0')
*writeIndex++ = *readIndex++;
while (*readIndex == '_')
readIndex++;
}
*writeIndex = '\0';
return src;
}
class both_equal_to__blastfurnace_version1{
char value;
public:
both_equal_to__blastfurnace_version1(char ch) : value(ch) {}
bool operator()(char first, char second) const{
return (first == second) && (first == value);
}
};
string convert_underscores_blastfurnace_version1_with_functor(string str){
//CONS:
//You have to create an entirely new class that overloads an operator.
//The speed is harmed by the usage of string.
//PROS:
//Don't need to roll your own loops with pointers.
str.erase(
std::unique(
str.begin(),
str.end(),
both_equal_to__blastfurnace_version1('_')
),
str.end()
);
return str;
}
class both_equal_to_blastfurnace_version2{
char value;
public:
both_equal_to_blastfurnace_version2(char ch) : value(ch) {}
bool operator()(char first, char second) const{
return (first == second) && (first == value);
}
};
char *convert_underscores_blastfurnace_version2_std_unique_and_no_string(char *str){
//CONS:
//No problems.
//PROS:
//More concise/intuitive than the fastest version and nearly as fast. Winner!
if (str){
size_t length = strlen(str);
char *end = std::unique(str, str + length, both_equal_to_blastfurnace_version2('_'));
*end = '\0';
}
return str;
}
void assertCharStarEquals(char* a, char* b, string msg){
if (strcmp(a, b) == 0) cout<<"passed" << endl;
else cout << "Failed" << msg << " should be: '" << a << "' it returned: '" << b << "'" << endl;
}
void assertStringEquals(string a, string b, string msg){
if (a == b) cout<<"passed" << endl;
else cout << "Failed" << msg << " should be: '" << a << "' it returned: '" << b << "'" << endl;
}
void test01_convert_underscores_by_RolandXu_using_char_stars_and_pointers(int numtests, string str){
char mystr[str.length()];
strcpy(mystr, str.c_str());
clock_t start = clock();
int x = 0;
while(x < numtests){
char* s = convert_underscores_by_RolandXu_using_char_stars_and_pointers(mystr);
x++;
}
double diff = (std::clock() - start) / (double)CLOCKS_PER_SEC;
cout << diff << " RolandXu's version using char*. " << '\n';
}
void test02_convert_underscores_blastfurnace_version2_std_unique_and_no_string(int numtests, string str){
char mystr[str.length()];
strcpy(mystr, str.c_str());
clock_t start = clock();
int x = 0;
while(x < numtests){
char* val = convert_underscores_blastfurnace_version2_std_unique_and_no_string(mystr);
x++;
}
double diff = (std::clock() - start) / (double)CLOCKS_PER_SEC;
cout << diff << " Blastfurnace's version part two, functor, without string. " <<endl;
}
void test03_convert_underscores_blastfurnace_version1_with_functor(int numtests, string str){
clock_t start = clock();
int x = 0;
while(x < numtests){
string s = convert_underscores_blastfurnace_version1_with_functor(str);
x++;
}
double diff = (std::clock() - start) / (double)CLOCKS_PER_SEC;
cout << diff << " Blastfurnace's version part one with the functor and string. " <<endl;
}
void test04_convert_underscores_by_EricL_using_string_replace(int numtests, string str){
char mystr[str.length()];
strcpy(mystr, str.c_str());
clock_t start = clock();
int x = 0;
while(x < numtests){
string s = convert_underscores_by_EricL_using_string_replace(mystr);
x++;
}
double diff = (std::clock() - start) / (double)CLOCKS_PER_SEC;
cout<< diff << " Eric L's version looping over the string doing find double underscore replace with single underscore. " <<endl;
}
void test05_convert_underscores_EricL_loop_over_a_string_and_remove_repeats(int numtests, string str){
char mystr[str.length()];
strcpy(mystr, str.c_str());
clock_t start = clock();
int x = 0;
while(x < numtests){
string s = convert_underscores_EricL_loop_over_a_string_and_remove_repeats(mystr);
x++;
}
double diff = (std::clock() - start) / (double)CLOCKS_PER_SEC;
cout << diff << " Eric L's version looping over each char and assembling a string. "<< endl;
}
void test06_convert_underscores_jerry_coffins_with_remove_copy_if(int numtests, string str){
clock_t start = clock();
int x = 0;
while(x < numtests){
string s = convert_underscores_jerry_coffins_with_remove_copy_if(str);
x++;
}
double diff = (std::clock() - start) / (double)CLOCKS_PER_SEC;
cout<< diff << " Jerry Coffin's version with remove_copy_if. " <<endl;
}
int main(){
cout << "GO!\n";
int numtests = 1000000;
string test_string = "__aa_a_aaa_--__&___aa_234______3)_!___<_";
cout << "Time | Algorithm Used.\n";
test01_convert_underscores_by_RolandXu_using_char_stars_and_pointers(numtests, test_string);
test02_convert_underscores_blastfurnace_version2_std_unique_and_no_string(numtests, test_string);
test03_convert_underscores_blastfurnace_version1_with_functor(numtests, test_string);
test04_convert_underscores_by_EricL_using_string_replace(numtests, test_string);
test05_convert_underscores_EricL_loop_over_a_string_and_remove_repeats(numtests, test_string);
test06_convert_underscores_jerry_coffins_with_remove_copy_if(numtests, test_string);
//Produces the following output:
//Extra assertion testing to make sure everyone's algorithm is correct:
char in[30];
char out[30];
strcpy(in, "a__");
strcpy(out, "a_");
assertCharStarEquals(out, convert_underscores_by_RolandXu_using_char_stars_and_pointers(in), "01");
strcpy(in, "a_");
strcpy(out, "a_");
assertCharStarEquals(out, convert_underscores_by_RolandXu_using_char_stars_and_pointers(in), "02");
strcpy(in, "_______");
strcpy(out, "_");
assertCharStarEquals(out, convert_underscores_by_RolandXu_using_char_stars_and_pointers(in), "03");
strcpy(in, "__a");
strcpy(out, "_a");
assertCharStarEquals(out, convert_underscores_by_RolandXu_using_char_stars_and_pointers(in), "04");
strcpy(in, "_hello___world__");
strcpy(out, "_hello_world_");
assertCharStarEquals(out, convert_underscores_by_RolandXu_using_char_stars_and_pointers(in), "05");
strcpy(in, "");
strcpy(out, "");
assertCharStarEquals(out, convert_underscores_by_RolandXu_using_char_stars_and_pointers(in), "06");
strcpy(in, " __ ");
strcpy(out, " _ ");
assertCharStarEquals(out, convert_underscores_by_RolandXu_using_char_stars_and_pointers(in), "07");
strcpy(in, "U+221E");
strcpy(out, "U+221E");
assertCharStarEquals(out, convert_underscores_by_RolandXu_using_char_stars_and_pointers(in), "08");
strcpy(in, "__\u00b2__");
strcpy(out, "_\u00b2_");
assertCharStarEquals(out, convert_underscores_by_RolandXu_using_char_stars_and_pointers(in), "09");
cout<< "OK\n";
strcpy(in, "a__");
strcpy(out, "a_");
assertCharStarEquals(out, convert_underscores_blastfurnace_version2_std_unique_and_no_string(in), "01");
strcpy(in, "a_");
strcpy(out, "a_");
assertCharStarEquals(out, convert_underscores_blastfurnace_version2_std_unique_and_no_string(in), "02");
strcpy(in, "_______");
strcpy(out, "_");
assertCharStarEquals(out, convert_underscores_blastfurnace_version2_std_unique_and_no_string(in), "03");
strcpy(in, "__a");
strcpy(out, "_a");
assertCharStarEquals(out, convert_underscores_blastfurnace_version2_std_unique_and_no_string(in), "04");
strcpy(in, "_hello___world__");
strcpy(out, "_hello_world_");
assertCharStarEquals(out, convert_underscores_blastfurnace_version2_std_unique_and_no_string(in), "05");
strcpy(in, "");
strcpy(out, "");
assertCharStarEquals(out, convert_underscores_blastfurnace_version2_std_unique_and_no_string(in), "06");
strcpy(in, " __ ");
strcpy(out, " _ ");
assertCharStarEquals(out, convert_underscores_blastfurnace_version2_std_unique_and_no_string(in), "07");
strcpy(in, "U+221E");
strcpy(out, "U+221E");
assertCharStarEquals(out, convert_underscores_blastfurnace_version2_std_unique_and_no_string(in), "08");
strcpy(in, "__\u00b2__");
strcpy(out, "_\u00b2_");
assertCharStarEquals(out, convert_underscores_blastfurnace_version2_std_unique_and_no_string(in), "09");
cout<< "OK\n";
string in_s = "a__";
string out_s = "a_";
assertStringEquals(out_s, convert_underscores_blastfurnace_version1_with_functor(in_s), "01");
in_s = "a_";
out_s = "a_";
assertStringEquals(out_s, convert_underscores_blastfurnace_version1_with_functor(in_s), "02");
in_s = "_______";
out_s = "_";
assertStringEquals(out_s, convert_underscores_blastfurnace_version1_with_functor(in_s), "03");
in_s = "__a";
out_s = "_a";
assertStringEquals(out_s, convert_underscores_blastfurnace_version1_with_functor(in_s), "04");
in_s = "_hello___world__";
out_s = "_hello_world_";
assertStringEquals(out_s, convert_underscores_blastfurnace_version1_with_functor(in_s), "05");
in_s = "";
out_s = "";
assertStringEquals(out_s, convert_underscores_blastfurnace_version1_with_functor(in_s), "06");
in_s = " __ ";
out_s = " _ ";
assertStringEquals(out_s, convert_underscores_blastfurnace_version1_with_functor(in_s), "07");
in_s = "U+221E";
out_s = "U+221E";
assertStringEquals(out_s, convert_underscores_blastfurnace_version1_with_functor(in_s), "08");
in_s = "__\u00b2__";
out_s = "_\u00b2_";
assertStringEquals(out_s, convert_underscores_blastfurnace_version1_with_functor(in_s), "09");
cout<< "OK\n";
in_s = "a__";
out_s = "a_";
assertStringEquals(out_s, convert_underscores_by_EricL_using_string_replace(in_s), "01");
in_s = "a_";
out_s = "a_";
assertStringEquals(out_s, convert_underscores_by_EricL_using_string_replace(in_s), "02");
in_s = "_______";
out_s = "_";
assertStringEquals(out_s, convert_underscores_by_EricL_using_string_replace(in_s), "03");
in_s = "__a";
out_s = "_a";
assertStringEquals(out_s, convert_underscores_by_EricL_using_string_replace(in_s), "04");
in_s = "_hello___world__";
out_s = "_hello_world_";
assertStringEquals(out_s, convert_underscores_by_EricL_using_string_replace(in_s), "05");
in_s = "";
out_s = "";
assertStringEquals(out_s, convert_underscores_by_EricL_using_string_replace(in_s), "06");
in_s = " __ ";
out_s = " _ ";
assertStringEquals(out_s, convert_underscores_by_EricL_using_string_replace(in_s), "07");
in_s = "U+221E";
out_s = "U+221E";
assertStringEquals(out_s, convert_underscores_by_EricL_using_string_replace(in_s), "08");
in_s = "__\u00b2__";
out_s = "_\u00b2_";
assertStringEquals(out_s, convert_underscores_by_EricL_using_string_replace(in_s), "09");
cout<< "OK\n";
in_s = "a__";
out_s = "a_";
assertStringEquals(out_s, convert_underscores_EricL_loop_over_a_string_and_remove_repeats(in_s), "01");
in_s = "a_";
out_s = "a_";
assertStringEquals(out_s, convert_underscores_EricL_loop_over_a_string_and_remove_repeats(in_s), "02");
in_s = "_______";
out_s = "_";
assertStringEquals(out_s, convert_underscores_EricL_loop_over_a_string_and_remove_repeats(in_s), "03");
in_s = "__a";
out_s = "_a";
assertStringEquals(out_s, convert_underscores_EricL_loop_over_a_string_and_remove_repeats(in_s), "04");
in_s = "_hello___world__";
out_s = "_hello_world_";
assertStringEquals(out_s, convert_underscores_EricL_loop_over_a_string_and_remove_repeats(in_s), "05");
in_s = "";
out_s = "";
assertStringEquals(out_s, convert_underscores_EricL_loop_over_a_string_and_remove_repeats(in_s), "06");
in_s = " __ ";
out_s = " _ ";
assertStringEquals(out_s, convert_underscores_EricL_loop_over_a_string_and_remove_repeats(in_s), "07");
in_s = "U+221E";
out_s = "U+221E";
assertStringEquals(out_s, convert_underscores_EricL_loop_over_a_string_and_remove_repeats(in_s), "08");
in_s = "__\u00b2__";
out_s = "_\u00b2_";
assertStringEquals(out_s, convert_underscores_EricL_loop_over_a_string_and_remove_repeats(in_s), "09");
cout<< "OK\n";
in_s = "a__";
out_s = "a_";
assertStringEquals(out_s, convert_underscores_jerry_coffins_with_remove_copy_if(in_s), "01");
in_s = "a_";
out_s = "a_";
assertStringEquals(out_s, convert_underscores_jerry_coffins_with_remove_copy_if(in_s), "02");
in_s = "_______";
out_s = "_";
assertStringEquals(out_s, convert_underscores_jerry_coffins_with_remove_copy_if(in_s), "03");
in_s = "__a";
out_s = "_a";
assertStringEquals(out_s, convert_underscores_jerry_coffins_with_remove_copy_if(in_s), "04");
in_s = "_hello___world__";
out_s = "_hello_world_";
assertStringEquals(out_s, convert_underscores_jerry_coffins_with_remove_copy_if(in_s), "05");
in_s = "";
out_s = "";
assertStringEquals(out_s, convert_underscores_jerry_coffins_with_remove_copy_if(in_s), "06");
in_s = " __ ";
out_s = " _ ";
assertStringEquals(out_s, convert_underscores_jerry_coffins_with_remove_copy_if(in_s), "07");
in_s = "U+221E";
out_s = "U+221E";
assertStringEquals(out_s, convert_underscores_jerry_coffins_with_remove_copy_if(in_s), "08");
in_s = "__\u00b2__";
out_s = "_\u00b2_";
assertStringEquals(out_s, convert_underscores_jerry_coffins_with_remove_copy_if(in_s), "09");
return 0;
}
私たちは何を学びましたか?
C ++文字列では、コンパイラが文字列のメモリを1つずつ忠実にステップ実行し、メモリ内の文字列の終わりを見つけてカウントするため、'str.length()'は非常に重いです。文字列を使用しないでください。また、str.length();を使用しないでください。
str.end()を使用すると、項目1と同様にO(n)パフォーマンスヒットも呼び出されます。str.end()は使用しないでください。
char配列でのstd::uniqueの使用は最適化されており、非常に高速です。文字列を連結したforループよりも1桁高速です。
C ++では、mystring [x]を実行すると、メモリ内のそのスロットのメモリルックアップが発生します。これには時間がかかり、ポインタを使用してポインタに1を追加するよりもはるかに低速です。xで反復するループ内にmystring[x]を配置しないでください。
文字列を使用する必要がある場合は、mystring + =anotherstring[x]を入れないでください。文字列は、この行を実行するたびに、文字列全体を1つずつステップ実行する必要があります。
文字列を連結したり、メモリのチャンクを取得したり、ポインタを定義したり、ポインタに文字を配置したり、ポインタをインクリメントしたりしないでください。ループと文字列との連結は、O(n)の複雑さを引き起こします。
この日はたくさんの学びがあり、素晴らしい歌が歌われます。
ライブラリなし:
#include <stdio.h>
char *RemoveUnderScore(char *src)
{
char* readIndex;
char* writeIndex;
if (src==NULL) return NULL;
readIndex = writeIndex = src;
while (*readIndex != '\0')
{
while(*readIndex !='_' && *readIndex != '\0')
*writeIndex++ = *readIndex++;
if (*readIndex != '\0')
*writeIndex++ = *readIndex++;
while (*readIndex == '_')
readIndex++;
}
*writeIndex = '\0';
return src;
}
int main(int argc,char** argv){
char str[] = "_hello___worl__d___";
printf(RemoveUnderScore(str));
return 0;
}
私は次のようなものを使用すると思いますstd::remove_copy_if
:
char prev;
std::remove_copy_if(input.begin(), input.end(),
std::back_inserter(result),
[&prev] (char ch) ->bool {
bool ret=prev=='_' && ch == '_';
prev=ch;
return ret;
});
または、C ++ 03で立ち往生している場合は、ラムダの代わりに明示的なファンクターを使用して同じことを行うことができます。
class repeated {
char prev;
char val;
public:
repeated(char ch) : val(ch), prev(0) {}
bool operator()(char ch) {
bool ret = prev == val && ch == val;
prev = ch;
return ret;
}
};
std::remove_copy_if(input.begin(), input.end(),
std::back_inserter(result),
repeated('_'));
いずれにせよ、各文字を入力から出力に(最大で)1回だけコピーします。ここで、を使用std::string::replace
すると、左側に繰り返されるアンダースコアの数と同じ頻度で各文字がコピーされます。
編集:@blastfurnaceの答えを見ても、実際にこれを使用するかどうかはわかりません-std::unique
おそらくこの仕事に適しています。
In C++, what I gave, very inefficiently:
string convert_underscores(string str){
int len = str.length();
//bad, str.length() incurs O(n) complexity.
string result = "";
//bad, don't use string. string makes everything slow.
if (len < 2) return str;
result += str[0];
//bad bad bad, string concatenation incurs O(n) complexity.
for(int x = 1; x < len; x++){
//This for loop incurs unnecessary management overhead.
if (str[x] != str[x-1] || str[x] != '_'){
result += str[x];
//concatenation extremely evil here: costs O(n)
//A lookup on str[x] costs time.
}
}
return result;
//returning a string bad, the entire memory is moved
//instead of just a pointer to the first slot.
}
or even less efficient with str.replace.
string convert_underscores(string str){
while(str.find("__") != string::npos){
str = str.replace(str.find("__"), 2, "_");
}
return str;
}
//find incurs unnecessary O(n) complexity. string copy incurs
//a full memory movement on contents of string.
//n passes are required over the string.