c - セグ。ハッシュテーブル ADT の障害 - C

Question

編集：

Hash.c はコメントからのリビジョンで更新されますが、まだ Seg 障害が発生しています。私はあなたたちが言っていることをここで見逃しているに違いない

C を使用してハッシュテーブル ADT を作成しましたが、ADT で関数 (find_hash) を呼び出そうとすると、セグメンテーションエラーが発生します。

私が作成した parse.c、hash.c、および hash.h の 3 つのファイルすべてを投稿したので、すべての変数を確認できます。同じく添付されているファイル gettysburg.txt から読み取っています。

find_hash を呼び出すと、parse.c でセグフォールトが発生します。ここで何が起こっているのか、一生わからない。さらに情報が必要な場合は、必ず提供できます。

長いコードで申し訳ありませんが、これで1週間完全に困惑しました。前もって感謝します

私がプログラムを実行する方法は最初です: gcc -o parse parse.c hash.c 次に: cat gettysburg.txt | 解析する

Parse.c

#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include "hash.h"

#define WORD_SIZE 40
#define DICTIONARY_SIZE 1000

#define TRUE 1
#define FALSE 0


void lower_case_word(char *);
void dump_dictionary(Phash_table ); 

/*Hash and compare functions*/
int hash_func(char *);
int cmp_func(void *, void *);

typedef struct user_data_ {   
    char word[WORD_SIZE];
    int freq_counter;
} user_data, *Puser_data;

int main(void)
{
   char c, word1[WORD_SIZE];
   int char_index = 0, dictionary_size = 0, num_words = 0, i;
   int total=0, largest=0;
   float average = 0.0;

   Phash_table t;                  //Pointer to main hash_table
   int (*Phash_func)(char *)=NULL;         //Function Pointers
   int (*Pcmp_func)(void *, void *)=NULL;
   Puser_data data_node;                   //pointer to hash table above
   user_data * find;


   printf("Parsing input ...\n");

   Phash_func = hash_func;   //Assigning Function pointers
   Pcmp_func = cmp_func;
   t = new_hash(1000,Phash_func,Pcmp_func);

  // Read in characters until end is reached 
  while ((c = getchar()) != EOF) {
    if ((c == ' ') || (c == ',') || (c == '.') || (c == '!') || (c == '"') ||
        (c == ':') || (c == '\n')) {
          // End of a word 
      if (char_index) {
          // Word is not empty 
        word1[char_index] = '\0';
        lower_case_word(word1);

        data_node = (Puser_data)malloc(sizeof(user_data));  
        strcpy(data_node->word,word1);
        printf("%s\n", data_node->word);


    //!!!!!!SEG FAULT HERE!!!!!!

        if (!((user_data *)find_hash(t, data_node->word))){   //SEG FAULT!!!!
         insert_hash(t,word1,(void *)data_node); 
        }

        char_index = 0;
        num_words++;
      }
    } else {
      // Continue assembling word 
      word1[char_index++] = c;
    }
  }

  printf("There were %d words; %d unique words.\n", num_words,
     dictionary_size);
  dump_dictionary(t);  //???

  }

void lower_case_word(char *w){
  int i = 0;

  while (w[i] != '\0') {
    w[i] = tolower(w[i]);
    i++;
  }
}

void dump_dictionary(Phash_table t){  //???
  int i;
  user_data *cur, *cur2;

  stat_hash(t, &(t->total), &(t->largest), &(t->average));   //Call to stat hash
    printf("Number of unique words:  %d\n", t->total);
    printf("Largest Bucket:  %d\n", t->largest);
    printf("Average Bucket:  %f\n", t->average);  

  cur = start_hash_walk(t);
  printf("%s:  %d\n", cur->word, cur->freq_counter);

  for (i = 0; i < t->total; i++)
     cur2 = next_hash_walk(t);
     printf("%s:  %d\n", cur2->word, cur2->freq_counter);
}

int hash_func(char *string){
    int i, sum=0, temp, index;

    for(i=0; i < strlen(string);i++){
        sum += (int)string[i];  
    }
    index = sum % 1000;
return (index); 
}


/*array1 and array2 point to the user defined data struct defined above*/
int cmp_func(void *array1, void *array2){

user_data *cur1= array1;
user_data *cur2= array2;//(user_data *)array2;

    if(cur1->freq_counter < cur2->freq_counter){
        return(-1);}
        else{ if(cur1->freq_counter > cur2->freq_counter){
                return(1);}
                else return(0);}
}

hash.c

#include "hash.h"

Phash_table new_hash (int size, int(*hash_func)(char*), int(*cmp_func)(void*, void*)){
    int i;
    Phash_table t;

    t = (Phash_table)malloc(sizeof(hash_table));   //creates the main hash table
    t->buckets = (hash_entry **)malloc(sizeof(hash_entry *)*size);  //creates the hash table of "size" buckets
    t->size = size;   //Holds the number of buckets
    t->hash_func = hash_func;   //assigning the pointer to the function in the user's program
    t->cmp_func = cmp_func;     // "  "  
    t->total=0;
    t->largest=0;
    t->average=0;
    t->sorted_array = NULL;
    t->index=0;
    t->sort_num=0;

    for(i=0;i<size;i++){   //Sets all buckets in hash table to NULL
        t->buckets[i] = NULL;}

    return(t);
}

void free_hash(Phash_table table){
    int i;
    hash_entry *cur;

    for(i = 0; i<(table->size);i++){
        if(table->buckets[i] != NULL){
            for(cur=table->buckets[i]; cur->next != NULL; cur=cur->next){
                free(cur->key);  //Freeing memory for key and data
                free(cur->data);
            }
      free(table->buckets[i]);    //free the whole bucket
    }}
    free(table->sorted_array);
    free(table);
}

void insert_hash(Phash_table table, char *key, void *data){
    Phash_entry new_node;   //pointer to a new node of type hash_entry
    int index;

    new_node = (Phash_entry)malloc(sizeof(hash_entry));
    new_node->key = (char *)malloc(sizeof(char)*(strlen(key)+1)); //creates the key array based on the length of the string-based key
    new_node->data = data;       //stores the user's data into the node
    strcpy(new_node->key,key);   //copies the key into the node

                                //calling the hash function in the user's program
    index = table->hash_func(key);    //index will hold the hash table value for where the new node will be placed
    table->buckets[index] = new_node; //Assigns the pointer at the index value to the new node
    table->total++;   //increment the total (total # of buckets)
}

void *find_hash(Phash_table table, char *key){
    int i;
    hash_entry *cur;
   printf("Inside find_hash\n"); //REMOVE

    for(i = 0;i<table->size;i++){
        if(table->buckets[i]!=NULL){            
            for(cur = table->buckets[i]; cur->next != NULL; cur = cur->next){
                if(strcmp(table->buckets[i]->key, key) == 0)
                return((table->buckets[i]->data));}  //returns the data to the user if the key values match
        }    //otherwise return NULL, if no match was found.
    }   
    return NULL;
}
void stat_hash(Phash_table table, int *total, int *largest, float *average){

    int node_num[table->size];  //creates an array, same size as table->size(# of buckets)
    int i,j, count = 0;
    int largest_buck = 0;
    hash_entry *cur;

    for(i = 0; i < table->size; i ++){
        if(table->buckets[i] != NULL){
            for(cur=table->buckets[i]; cur->next!=NULL; cur = cur->next){
                count ++;}
            node_num[i] = count;
            count = 0;}
        }

    for(j = 0; j < table->size; j ++){      
        if(node_num[j] > largest_buck)
            largest_buck = node_num[j];}

    *total = table->total;
    *largest = largest_buck;
    *average = (table->total) / (table->size);
}

void *start_hash_walk(Phash_table table){
    Phash_table temp = table;
    int i, j, k;
    hash_entry *cur;  //CHANGE IF NEEDED to HASH_TABLE *

    if(table->sorted_array != NULL) free(table->sorted_array);

    table->sorted_array = (void**)malloc(sizeof(void*)*(table->total));

    for(i = 0; i < table->total; i++){
        if(table->buckets[i]!=NULL){
            for(cur=table->buckets[i]; cur->next != NULL; cur=cur->next){
                table->sorted_array[i] = table->buckets[i]->data;
        }}
    }

    for(j = (table->total) - 1; j > 0; j --)    {
        for(k = 1; k <= j; k ++){
            if(table->cmp_func(table->sorted_array[k-1], table->sorted_array[k]) == 1){
                temp -> buckets[0]-> data = table->sorted_array[k-1];
                table->sorted_array[k-1] = table->sorted_array[k];
                table->sorted_array[k] = temp->buckets[0] -> data;
            }
        }
    }
    return table->sorted_array[table->sort_num];
}

void *next_hash_walk(Phash_table table){ 

    table->sort_num ++;
    return table->sorted_array[table->sort_num];
}

ハッシュ.h

#include <stdio.h> 
#include <stdlib.h>
#include <string.h>

typedef struct hash_entry_ {    //Linked List
    void *data;                 //Generic pointer
    char *key;                  //String-based key value
    struct hash_entry_ *next;   //Self-Referencing pointer
} hash_entry, *Phash_entry;

typedef struct hash_table_ {
    hash_entry **buckets;           //Pointer to a pointer to a Linked List of type hash_entry
    int (*hash_func)(char *);
    int (*cmp_func)(void *, void *);
    int size;
    void **sorted_array;         //Array used to sort each hash entry
    int index;
    int total;
    int largest;
    float average;  
    int sort_num;
} hash_table, *Phash_table;


Phash_table new_hash(int size, int (*hash_func)(char *), int (*cmp_func)(void *, void *));
void free_hash(Phash_table table);
void insert_hash(Phash_table table, char *key, void *data);
void *find_hash(Phash_table table, char *key);
void stat_hash(Phash_table table, int *total, int *largest, float *average);
void *start_hash_walk(Phash_table table);
void *next_hash_walk(Phash_table table);

ゲティスバーグ.txt

Four score and seven years ago, our fathers brought forth upon this continent a new nation: conceived in liberty, and dedicated to the proposition that all men are created equal.

Now we are engaged in a great civil war. . .testing whether that nation, or any nation so conceived and so dedicated. . . can long endure. We are met on a great battlefield of that war.

We have come to dedicate a portion of that field as a final resting place for those who here gave their lives that that nation might live. It is altogether fitting and proper that we should do this.

But, in a larger sense, we cannot dedicate. . .we cannot consecrate. . . we cannot hallow this ground. The brave men, living and dead, who struggled here have consecrated it, far above our poor power to add or detract. The world will little note, nor long remember, what we say here, but it can never forget what they did here.

It is for us the living, rather, to be dedicated here to the unfinished work which they who fought here have thus far so nobly advanced. It is rather for us to be here dedicated to the great task remaining before us. . .that from these honored dead we take increased devotion to that cause for which they gave the last full measure of devotion. . . that we here highly resolve that these dead shall not have died in vain. . . that this nation, under God, shall have a new birth of freedom. . . and that government of the people. . .by the people. . .for the people. . . shall not perish from the earth.

score 2 · Accepted Answer

このコードに関するいくつかの問題の1つは、次のようなループである可能性があります。

for(table->buckets[i]; 
    table->buckets[i]->next != NULL; 
    table->buckets[i] = table->buckets[i]->next)
  ...

forループの初期化部分（table->buckets[i]）は効果がありません。iが0で、の場合table->buckets[0] == NULL、このループ（table->buckets[i]->next != NULL）の条件はnullポインターを逆参照し、クラッシュします。

少なくとも、私のボックスではコードがクラッシュしているように見えました。いくつかのループを次のように変更したとき：

if (table->buckets[i] != NULL) {
  for(; 
      table->buckets[i]->next != NULL; 
      table->buckets[i] = table->buckets[i]->next)
    ...
}

...クラッシュし続けましたが、別の場所にありました。多分それはあなたが動けなくなるのを助けるでしょう？

編集：別の潜在的な問題は、forループが破壊的であるということです。を呼び出すときにfind_hash、これらのバケットをすべて変更する必要がありますか？

次のようなものを使用することをお勧めします：

hash_entry *cur;
// ...
if (table->buckets[i] != NULL) {
  for (cur = table->buckets[i]; cur->next != NULL; cur = cur->next) {
    // ...
  }
}

私がそれをしてあなたの関数をコメントアウトするとdump_dictionary、あなたのコードはクラッシュすることなく実行されます。

score 0 · Accepted Answer

うーん、

ここにhash.cがあります

#include "hash.h"

Phash_table new_hash (int size, int(*hash_func)(char*), int(*cmp_func)(void*, void*)){
    int i;
    Phash_table t;

    t = (Phash_table)calloc(1, sizeof(hash_table));   //creates the main hash table
    t->buckets = (hash_entry **)calloc(size, sizeof(hash_entry *));  //creates the hash table of "size" buckets
    t->size = size;   //Holds the number of buckets
    t->hash_func = hash_func;   //assigning the pointer to the function in the user's program
    t->cmp_func = cmp_func;     // "  "  
    t->total=0;
    t->largest=0;
    t->average=0;

    for(i=0;t->buckets[i] != NULL;i++){   //Sets all buckets in hash table to NULL
        t->buckets[i] = NULL;}

    return(t);
}

void free_hash(Phash_table table){
    int i;

    for(i = 0; i<(table->size);i++){
        if(table->buckets[i]!=NULL)
            for(table->buckets[i]; table->buckets[i]->next != NULL; table->buckets[i] = table->buckets[i]->next){
                free(table->buckets[i]->key);  //Freeing memory for key and data
                free(table->buckets[i]->data);
            }
      free(table->buckets[i]);    //free the whole bucket
    }
    free(table->sorted_array);
    free(table);
}

void insert_hash(Phash_table table, char *key, void *data){
    Phash_entry new_node;   //pointer to a new node of type hash_entry
    int index;

    new_node = (Phash_entry)calloc(1,sizeof(hash_entry));
    new_node->key = (char *)malloc(sizeof(char)*(strlen(key)+1)); //creates the key array based on the length of the string-based key
    new_node->data = data;       //stores the user's data into the node
    strcpy(new_node->key,key);   //copies the key into the node

                                //calling the hash function in the user's program
    index = table->hash_func(key);    //index will hold the hash table value for where the new node will be placed
    table->buckets[index] = new_node; //Assigns the pointer at the index value to the new node
    table->total++;   //increment the total (total # of buckets)
}

void *find_hash(Phash_table table, char *key){
    int i;
hash_entry *cur;

   printf("Inside find_hash\n"); //REMOVE

    for(i = 0;i<table->size;i++){
        if(table->buckets[i]!=NULL){
            for (cur = table->buckets[i]; cur != NULL; cur = cur->next){
            //for(table->buckets[i]; table->buckets[i]->next != NULL; table->buckets[i] = table->buckets[i]->next){
                if(strcmp(cur->key, key) == 0)
                   return((cur->data));}  //returns the data to the user if the key values match
        }    //otherwise return NULL, if no match was found.
    }   
    return NULL;
}
void stat_hash(Phash_table table, int *total, int *largest, float *average){

    int node_num[table->size];    
    int i,j, count = 0;
    int largest_buck = 0;
    hash_entry *cur;

    for(i = 0; i < table->size; i ++)
    {
        if(table->buckets[i]!=NULL)            
            for (cur = table->buckets[i]; cur != NULL; cur = cur->next){
            //for(table->buckets[i]; table->buckets[i]->next != NULL; table->buckets[i] = table->buckets[i]->next){
                count ++;}

        node_num[i] = count;
        count = 0;
    }

    for(j = 0; j < table->size; j ++){
        if(node_num[j] > largest_buck)
            largest_buck = node_num[j];}

    *total = table->total;
    *largest = largest_buck;
    *average = (table->total) /(float) (table->size); //oook: i think you want a fp average
}

void *start_hash_walk(Phash_table table){
    void* temp = 0; //oook: this was another way of overwriting your input table 
    int i, j, k; 
    int l=0; //oook: new counter for elements in your sorted_array
    hash_entry *cur;

    if(table->sorted_array !=NULL) free(table->sorted_array);

    table->sorted_array = (void**)calloc((table->total), sizeof(void*));

    for(i = 0; i < table->size; i ++){
    //for(i = 0; i < table->total; i++){  //oook: i don't think you meant total ;)
        if(table->buckets[i]!=NULL)
            for (cur = table->buckets[i]; cur != NULL; cur = cur->next){
            //for(table->buckets[i]; table->buckets[i]->next != NULL; table->buckets[i] = table->buckets[i]->next){
                table->sorted_array[l++] = cur->data;
            }
    }

    //oook: sanity check/assert on expected values
    if (l != table->total)
    {
        printf("oook: l[%d] != table->total[%d]\n",l,table->total);
    }

    for(j = (l) - 1; j > 0; j --)    {
        for(k = 1; k <= j; k ++){
            if (table->sorted_array[k-1] && table->sorted_array[k])
            {
                if(table->cmp_func(table->sorted_array[k-1], table->sorted_array[k]) == 1){
                    temp = table->sorted_array[k-1]; //ook. changed temp to void* see assignment
                    table->sorted_array[k-1] = table->sorted_array[k];
                    table->sorted_array[k] = temp;
                }
            }
            else
                printf("if (table->sorted_array[k-1] && table->sorted_array[k])\n");
        }
    }
    return table->sorted_array[table->sort_num];
}

void *next_hash_walk(Phash_table table){ 

    /*oook: this was blowing up since you were incrementing past the size of sorted_array..
    NB: *you **need** to implement some bounds checking here or you will endup with more seg-faults!!*/
    //table->sort_num++
    return table->sorted_array[table->sort_num++];
}

ここにparse.cがあります

#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <assert.h>  //oook: added so you can assert ;)
#include "hash.h"

#define WORD_SIZE 40
#define DICTIONARY_SIZE 1000

#define TRUE 1
#define FALSE 0


void lower_case_word(char *);
void dump_dictionary(Phash_table ); 

/*Hash and compare functions*/
int hash_func(char *);
int cmp_func(void *, void *);

typedef struct user_data_ {   
    char word[WORD_SIZE];
    int freq_counter;
} user_data, *Puser_data;

int main(void)
{
   char c, word1[WORD_SIZE];
   int char_index = 0, dictionary_size = 0, num_words = 0, i;
   int total=0, largest=0;
   float average = 0.0;

   Phash_table t;                  //Pointer to main hash_table
   int (*Phash_func)(char *)=NULL;         //Function Pointers
   int (*Pcmp_func)(void *, void *)=NULL;
   Puser_data data_node;                   //pointer to hash table above
   user_data * find;


   printf("Parsing input ...\n");

   Phash_func = hash_func;   //Assigning Function pointers
   Pcmp_func = cmp_func;
   t = new_hash(1000,Phash_func,Pcmp_func);

  // Read in characters until end is reached 
  while ((c = getchar()) != EOF) {
    if ((c == ' ') || (c == ',') || (c == '.') || (c == '!') || (c == '"') ||
        (c == ':') || (c == '\n')) {
          // End of a word 
      if (char_index) {
          // Word is not empty 
        word1[char_index] = '\0';
        lower_case_word(word1);

        data_node = (Puser_data)calloc(1,sizeof(user_data));  
        strcpy(data_node->word,word1);
        printf("%s\n", data_node->word);


    //!!!!!!SEG FAULT HERE!!!!!!

        if (!((user_data *)find_hash(t, data_node->word))){   //SEG FAULT!!!!
        dictionary_size++;
         insert_hash(t,word1,(void *)data_node); 
        }

        char_index = 0;
        num_words++;
      }
    } else {
      // Continue assembling word 
      word1[char_index++] = c;
    }
  }

  printf("There were %d words; %d unique words.\n", num_words,
     dictionary_size);
  dump_dictionary(t);  //???

  }

void lower_case_word(char *w){
  int i = 0;

  while (w[i] != '\0') {
    w[i] = tolower(w[i]);
    i++;
  }
}

void dump_dictionary(Phash_table t){  //???
  int i;
  user_data *cur, *cur2;

  stat_hash(t, &(t->total), &(t->largest), &(t->average));   //Call to stat hash
    printf("Number of unique words:  %d\n", t->total);
    printf("Largest Bucket:  %d\n", t->largest);
    printf("Average Bucket:  %f\n", t->average);  

  cur = start_hash_walk(t);
  if (!cur) //ook: do test or assert for null values
  {
    printf("oook: null== (cur = start_hash_walk)\n");
    exit(-1);
  }
  printf("%s:  %d\n", cur->word, cur->freq_counter);
  for (i = 0; i < t->total; i++)
  {//oook: i think you needed these braces
      cur2 = next_hash_walk(t);
      if (!cur2) //ook: do test or assert for null values
      {
        printf("oook: null== (cur2 = next_hash_walk(t) at i[%d])\n",i);
      }
      else
        printf("%s:  %d\n", cur2->word, cur2->freq_counter);
  }//oook: i think you needed these braces
}

int hash_func(char *string){
    int i, sum=0, temp, index;

    for(i=0; i < strlen(string);i++){
        sum += (int)string[i];  
    }
    index = sum % 1000;
return (index); 
}


/*array1 and array2 point to the user defined data struct defined above*/
int cmp_func(void *array1, void *array2){

user_data *cur1= array1;
user_data *cur2= array2;//(user_data *)array2;

    /* ooook: do assert on programmatic errors. 
    this function *requires non-null inputs.  */
    assert(cur1 && cur2);  
    if(cur1->freq_counter < cur2->freq_counter){
        return(-1);}
        else{ if(cur1->freq_counter > cur2->freq_counter){
                return(1);}
                else return(0);}
}

に従ってください//ooks

説明:

これが爆発する場所が 1 つか 2 つありました。
あなたの質問に対する簡単な修正と回答は、 L100parse.c年頃のにありました。

  cur = start_hash_walk(t);
  printf("%s:  %d\n", cur->word, cur->freq_counter);

..呼び出し前でcurはないことを確認すると、即時のセグフォルトが修正されます。nullprintf

しかし、なぜでしょcurうnullか？~この悪い子のせいで:
void *start_hash_walk(Phash_table table)

一意hash_func(char *string)でない値を返すことができます (& します)。リンクされたリストチェーンをまだ実装していない場合を除いて、これはもちろん問題ありません。したがって、table->sorted_array以下の要素を含むことになります ~ または、すべてのバケット table->totalを反復処理している場合はそうなります;)table->size

他に 1 つまたは 2 つの問題があります。今のところ、ネイト・コール for(cur=table->buckets[i]; cur->next != NULL; cur=cur->next)をさらにハッキングしfor(cur=table->buckets[i]; cur != NULL; cur=cur->next)ました。チェーンがないためです。しかし、これは*あなたのTODOなので、それについては十分に述べています。

ついに。あなたが持っていることに注意してnext_hash_walk(Phash_table table)ください：

table->sort_num++
return table->sorted_array[table->sort_num];

痛い！これらの配列の境界を確認してください。

注

1) 関数が入力を変更するように設計されていない場合は、入力を行いますconst。そうすれば、不注意で何かを破棄しているときに、コンパイラが通知してくれる可能性があります。

2) 配列インデックスの境界チェックを行います。

3) Null ポインターを使用する前にテスト/アサートを行います。

4) 各関数の単体テストを行います。コンパイルとテストの前にコードを書きすぎないようにしてください。

5) 最小限のテストデータを使用します。コードを制限テストし、狡猾な方法でそれを破ろうとするように作成します。

6) データ構造を初期化してください!

7)エジプト式ブレースは絶対に使用しないでください。{
冗談です ;)
}

PS ここまでお疲れ様でした ~> ポインターはトリッキーな小さなものです! & 必要なすべての詳細を含むよくある質問なので、+1 と gl ;)

(//oook: 多分宿題タグを追加してください)

c - セグ。ハッシュ テーブル ADT の障害 - C

編集：

Parse.c

hash.c

ハッシュ.h

ゲティスバーグ.txt

2 に答える 2

Related

Reference

c - セグ。ハッシュテーブル ADT の障害 - C