引用部分を分割せずに、Boost トークナイザーを文字列の下に分割する方法はありますか?
string s = "1st 2nd \"3rd with some comment\" 4th";
Exptected output:
1st
2nd
3rd with some comment
4th
You can use an escaped_list_separator
from the tokenizer library. See this question for more details on how to apply it to your problem.
C++11 solution
#include <iostream>
#include <string>
#include <vector>
std::vector<std::string> tokenize(const std::string& str) {
std::vector<std::string> tokens;
std::string buffer;
std::string::const_iterator iter = str.cbegin();
bool in_string = false;
while (iter != str.cend()) {
char c = *iter;
if (c == '"') {
if (in_string) {
tokens.push_back(buffer);
buffer.clear();
}
in_string = !in_string;
} else if (c == ' ') {
if (in_string) {
buffer.push_back(c);
} else {
if (!buffer.empty()) {
tokens.push_back(buffer);
buffer.clear();
}
}
} else {
buffer.push_back(c);
}
++iter;
}
if (!buffer.empty()) {
tokens.push_back(buffer);
}
return tokens;
}
int main() {
std::string s = "1st 2nd \"3rd with some comment\" 4th";
std::vector<std::string> tokens = tokenize(s);
for (auto iter = tokens.cbegin(); iter != tokens.cend(); ++iter) {
std::cout << *iter << "\n";
}
}
このコードを試すと、Boost.Tokenizer と Boost.Spirit ライブラリの使用を避けることができます。
#include <vector>
#include <string>
#include <iostream>
const char Separators[] = { ' ', 9 };
bool Str_IsSeparator( const char Ch )
{
for ( size_t i = 0; i != sizeof( Separators ); i++ )
{
if ( Separators[i] == Ch ) { return true; }
}
return false;
}
void SplitLine( size_t FromToken, size_t ToToken, const std::string& Str, std::vector<std::string>& Components /*, bool ShouldTrimSpaces*/ )
{
size_t TokenNum = 0;
size_t Offset = FromToken - 1;
const char* CStr = Str.c_str();
const char* CStrj = Str.c_str();
while ( *CStr )
{
// bypass spaces & delimiting chars
while ( *CStr && Str_IsSeparator( *CStr ) ) { CStr++; }
if ( !*CStr ) { return; }
bool InsideQuotes = ( *CStr == '\"' );
if ( InsideQuotes )
{
for ( CStrj = ++CStr; *CStrj && *CStrj != '\"'; CStrj++ );
}
else
{
for ( CStrj = CStr; *CStrj && !Str_IsSeparator( *CStrj ); CStrj++ );
}
// extract token
if ( CStr != CStrj )
{
TokenNum++;
// store each token found
if ( TokenNum >= FromToken )
{
Components[ TokenNum-Offset ].assign( CStr, CStrj );
// if ( ShouldTrimSpaces ) { Str_TrimSpaces( &Components[ TokenNum-Offset ] ); }
// proceed to next token
if ( TokenNum >= ToToken ) { return; }
}
CStr = CStrj;
// exclude last " from token, handle EOL
if ( *CStr ) { CStr++; }
}
}
}
int main()
{
std::string test = "1st 2nd \"3rd with some comment\" 4th";
std::vector<std::string> Out;
Out.resize(5);
SplitLine(1, 4, test, Out);
for(size_t j = 0 ; j != Out.size() ; j++) { std::cout << Out[j] << std::endl; }
return 0;
}
事前に割り当てられた文字列配列を使用し (ゼロベースではありませんが、簡単に修正できます)、非常に単純です。