c - 文字列を文字列の配列に分割するにはどうすればよいですか？

Question

例えば：

input（string）：foo $$ foo ## foo []

検索文字列）：foo

output（array）：$$ ,## ,[]

私はそれを試してみました：

char * str = "foo $$ foo ## foo []";
    char * s = "foo";

    int buf_len = 0;
    int len = strlen(s);
    int i = 0;

    char ** buffer = malloc(MAX_BUFFER_SIZE);
    char * tmpbuf = malloc(MAX_BUFFER_SIZE);
    char * p = str;
    char ** buf = buffer;
    char * tbuf = tmpbuf;

    while(*p)
    {
        if(*p == *s) 
        {
            while(*p == *(s + i)) 
            { 
                i++;
                p++;
            }

            if(i == len) 
            {
                *buf ++ = tbuf;
                memset(tbuf,0,buf_len);
                i = buf_len = 0;
            }
        }
        else 
        {
            *tbuf ++= *p;
            buf_len ++;
        }

        p++;
    }

    *buf ++= NULL;

    int x;
    for(x = 0; buffer[x]; x++)
    {
        printf("%s\n", buffer[x]);
    }

    free(buffer);
    free(tmpbuf);

次の出力が表示されます。

$$ ## []
## []
[]

しかし、期待されるのは：

$$  
##  
[]

これを修正する方法は？

score 5 · Accepted Answer

文字列を文字列の配列に分割する関数は次のとおりです。

#include <assert.h>
#include <string.h>

/*
 * Split a string by a delimiter.
 *
 * This function writes the beginning of each item to @pointers_out
 * (forming an array of C strings), and writes the actual string bytes
 * to @bytes_out.  Both buffers are assumed to be big enough for all of the
 * strings.
 *
 * Returns the number of strings written to @pointers_out.
 */
size_t explode(const char *delim, const char *str,
               char **pointers_out, char *bytes_out)
{
    size_t  delim_length        = strlen(delim);
    char   **pointers_out_start = pointers_out;

    assert(delim_length > 0);

    for (;;) {
        /* Find the next occurrence of the item delimiter. */
        const char *delim_pos = strstr(str, delim);

        /*
         * Emit the current output buffer position, since that is where the
         * next item will be written.
         */
        *pointers_out++ = bytes_out;

        if (delim_pos == NULL) {
            /*
             * No more item delimiters left.  Treat the rest of the input
             * string as the last item.
             */
            strcpy(bytes_out, str);
            return pointers_out - pointers_out_start;
        } else {
            /*
             * Item delimiter found.  The bytes leading up to it form the next
             * string.
             */
            while (str < delim_pos)
                *bytes_out++ = *str++;

            /* Don't forget the NUL terminator. */
            *bytes_out++ = '\0';

            /* Skip over the delimiter. */
            str += delim_length;
        }
    }
}

使用法：

#include <stdio.h>
/* ... */

#define BIG_ENOUGH 1000

int main(void)
{
    char    *items[BIG_ENOUGH];
    char     item_bytes[BIG_ENOUGH];
    size_t   i;
    size_t   count;

    count = explode("foo", "foo $$ foo ## foo []", items, item_bytes);

    for (i = 0; i < count; i++)
        printf("\"%s\"\n", items[i]);

    return 0;
}

出力：

""
" $$ "
" ## "
" []"

これは、あなたが要求した正確な出力を生成しません"foo"。文字列の先頭にある周囲のスペースと項目区切り文字 (この例では ) の出現を処理する方法がわからないためです。代わりに、PHP の爆発関数を模倣しました。

私のexplode関数がメモリ管理をどのようにパントするかを指摘したいと思います。バッファーが十分な大きさであることを確認するのは、呼び出し元の責任です。これは簡単なスクリプトでは問題ありませんが、この関数を正しく使用するために計算を行う必要がある、より本格的なプログラムでは煩わしい場合があります。独自の割り当てを実行する、より「堅牢な」実装を作成することもできましたが、次のようになります。

それは実装を混乱させるでしょう。
呼び出し元に、独自のメモリアロケータを使用するオプションはありません。

したがって、私が行った方法の実装explodeは「悪い」です。正しく使用するのが難しく、さらに悪いことに、間違って使用しやすいからです。一方で、機能性とメモリ管理の問題を分離するという点では「良い」と言えます。

score 3 · Accepted Answer

これは、次のように言うときにtbuftoの内容をコピーしないためです。buf

*buf ++ = tbuf;

あなたがすることは、現在の位置への参照を保存することですtbuf（または必要tmpbufに応じて）。

tmpbuf区切り文字以外のすべてで満たされます。

ループの最後では、次のようになります。

          01234567 <- offset
tmpbuf = "$$ ## []"

buf[0] = tmpbuf+0;
buf[1] = tmpbuf+3;
buf[2] = tmpbuf+6;

または非常に単純化されたメモリテーブル:

        memory
       address        value   
tmpbuf -> 0x01       [   $] <- buffer[0] points here
          0x02       [   $]
          0x03       [    ]
          0x04       [   #] <- buffer[1] points here
          0x05       [   #]
          0x06       [    ]
          0x07       [   [] <- buffer[2] points here
          0x08       [   ]]
          0x09       [    ]
          ...
buffer -> 0x3A       [0x01]
          0x3B       [0x04]
          0x3C       [0x07]
          0x3D       [    ]
          0x3E       [    ]
          ...

編集

それのファンのために。ポインター、動的、方法、使用しないstrstr()。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int is_needle(char *hay, char *needle)
{
    while (*hay && *++hay == *++needle);
    return *needle == '\0';
}

char *find(char *hay, char *needle)
{
    while (*hay) {
        if (*hay == *needle && is_needle(hay, needle))
            return hay;
        ++hay;
    }
    return hay;
}

int pushstr(char ***vs, size_t *vslen, char *val, size_t slen)
{
    char **vsp = *vs + *vslen;

    if ((*vsp = realloc(*(*vs + *vslen), slen + 1)) == NULL) {
        perror("pushstr.1"); exit(1);
    }

    memcpy(*vsp, val, slen);
    *(*vsp + slen) = '\0';

    if ((*vs  = realloc(*vs, sizeof(char*) * (++*vslen + 1))) == NULL) {
        perror("pushstr.2"); exit(1);
    }
    *(*vs + *vslen) = NULL;

    return *vslen;
}

int main(void)
{
    char *hay    = "foo $$ foo ## foo [] fox @@ foo ??";
    char *needle = "foo";
    char *np;
    char **vs;
    size_t vslen = 0;
    size_t nlen  = strlen(needle);

    if ((vs = malloc(sizeof(char*))) == NULL) {
        perror("main");
        return 1;
    }
    *vs = NULL;

    while (*(np = find(hay, needle))) {
        if (np != hay) {
            pushstr(&vs, &vslen, hay, np - hay);
            hay = np + nlen;
        } else {
            hay += nlen;
        }
    }
    if (np != hay)
        pushstr(&vs, &vslen, hay, np - hay);

    while (*vs)
        printf("V: '%s'\n", *vs++);
    vs -= vslen;

    while (*vs)
        free(*vs++);
    vs -= vslen;
    free(vs);

    return 0;
}

score 1 · Accepted Answer

これはのタスクですstrstr()。私はそれを利用するためにあなたのコードを少し変更しました。

int add_to_buf(char *str, size_t len, char ***buf)
{
  if (len <= 0) return 0;
  **buf = malloc (len);
  strncpy (**buf, str, len);
  ++*buf;
  return 1;
}

int main()
{
  char *str = "foo $$ foo ## foo []";
  char *s = "foo";

  char **buffer = malloc (MAX_BUFFER_SIZE*sizeof(*buffer)), **buf = buffer;
  char *start, *end;

  int s_len = strlen (s);

  start = str;
  end = strstr (str, s);
  while (end) {
    add_to_buf (start, end-start, &buf);
    start = end + s_len;
    end = strstr (start, s);
  }
  add_to_buf (start, strlen (str) - (start-str), &buf);
  *buf = 0;

  for (buf = buffer; *buf; ++buf)
      printf ("%s\n", *buf);

  free (buffer);
  return 0;
}

score 1 · Accepted Answer

単純なプログラムに対してあまりにも多くのポインターを使用しており、それらの使用方法が理解を困難にしています。私が目にする単純なバグの 1 つは、buffer**(文字列の配列) を使用しているが、1 つの文字列しか割り当てていないことです。あなたはトークンを格納するためのこの文字列の配列であり、どこかでメモリ違反が発生します。

トークンを出力したいので、それらを別の配列に保存する必要はありません。これは次のようになります。

#include<stdio.h>
#include<string.h>

int main(int ac, char*argv[]) {
char str[] = "foo $$ foo ## foo []";
char * s = "foo";   
char *p;

p = strtok(str, " "); // tokenize

while(p!=NULL)
{
if(strcmp(p, s)) //print non matching tokens
printf("%s\n", p);
p = strtok(NULL, " ");
}

return 0;
}

ここでは区切り文字が空白であることに注意してください。

score 0 · Accepted Answer

strtok関数は、このタスクのために設計されました。

#include <string.h>
...
char *token;
char *line = "LINE TO BE SEPARATED";
char *search = " ";


/* Token will point to "LINE". */
token = strtok(line, search);


/* Token will point to "TO". */
token = strtok(NULL, search);

c - 文字列を文字列の配列に分割するにはどうすればよいですか？

5 に答える 5

Related

Reference