这段代码是用的线性探测,怎么改成链地址法

这段代码是用的线性探测,怎么改成链地址法

#include 
#include 
#include 
#include 
#include 

#define MAX_PERSONS 100000    // hash table size - experiment with different values
#define MAX_NAME_LENGTH 50
#define COUNTRY_CODE_LENGTH 4
#define AREA_CODE_LENGTH 4
#define LOCAL_NUMBER_LENGTH 8

// struct to store a persosn name and phone nummber details
typedef struct person {
    char *first_name;
    char *last_name;
    char *fullName;
    char *countryCode;
    char *areaCode;
    char *localNumber;
    
} person_struct;

// global variables for printing status infomation
int collision = 0;
int longest_probing_sequence = 0;

// The bad hash function! TODO: Implement a better hesh function!
int count_hash(int mod, char* fullName, int fullNameLength) {
    int hash = 0;
    for(int i=0; i < fullNameLength; i++){
        hash = (hash*71 + fullName[i])%mod;
    }
    return hash;
}
int insertPerson(    person_struct *name_store,
            char *fi_name,
            char *la_name,
            char *c_code,
            char *a_code,
            char *l_number,
            int *allPersons) {
    int i;
    int hash,firsthash;
    
    // Allocating memory for person information
    int fiNameLength = strlen(fi_name);
    char *tempFiName = malloc(fiNameLength*sizeof(char)+1);
    int laNameLength = strlen(la_name);
    char *tempLaName = malloc(laNameLength*sizeof(char)+1);
    int coCodeLength = strlen(c_code);
    char *tempCoCode = malloc(coCodeLength*sizeof(char)+1);
    int arCodeLength = strlen(a_code);
    char *tempArCode = malloc(arCodeLength*sizeof(char)+1);
    int loNumberLength = strlen(l_number);
    char *tempLoNumber = malloc(loNumberLength*sizeof(char)+1);
    int fullNameLength = MAX_NAME_LENGTH*2;
    char *tempFullName = malloc(fullNameLength*sizeof(char)+1);

    strcpy(tempFiName, fi_name);
    strcpy(tempLaName, la_name);
    strcpy(tempCoCode, c_code);
    strcpy(tempArCode, a_code);
    strcpy(tempLoNumber, l_number);
    
    //concatenating first name and last name into a variable for the key
    strcpy(tempFullName, tempFiName);
    strcat(tempFullName, tempLaName);
    
    //changeing the key to capital letters
    for (i=0; i < fullNameLength; i++) {
        tempFullName[i] = toupper(tempFullName[i]);
    }
    tempFullName[i] = 0;
    /*printf("%s %s %s %s\n",tempFullName,tempArCode,tempArCode,tempLoNumber);*/
    // counter for number of persons stored in the hash table
    *allPersons = *allPersons+1;

    // compute hash for the key
    hash = count_hash(MAX_PERSONS, tempFullName, fullNameLength);
    //printf("%d",hash);
    // insert person into hash table
    firsthash = hash;
    int probing = 0;
    int stored = 0;
    while (stored == 0) {
        if (name_store[hash].fullName != 0) {
            // Linear probing used a probing method
            // TODO: Implement better probing method
            hash = (hash+1)%MAX_PERSONS;
            collision++;
            probing++;
            
            //Check if hashtable full
            if (hash == firsthash) {
                return 0;
            }
        }
        else {
            person_struct person;
            person.first_name = tempFiName;
            person.last_name = tempLaName;
            person.fullName = tempFullName;
            person.countryCode = tempCoCode;
            person.areaCode = tempArCode;
            person.localNumber = tempLoNumber;
            name_store[hash] = person;
            stored = 1;
        }
    }
    if (probing > longest_probing_sequence) {
        longest_probing_sequence = probing;
    }
    return 1;
}


// main function
int main() {
    FILE *personFile;
    //person_struct persons[MAX_PERSONS+1];
    // allocate memory for hash table and initialize needed variables
    person_struct *persons = (person_struct *)malloc(sizeof(person_struct)*(MAX_PERSONS+1));
    char firstName[MAX_NAME_LENGTH];
    char lastName[MAX_NAME_LENGTH];
    char countryCode[COUNTRY_CODE_LENGTH];
    char areaCode[AREA_CODE_LENGTH];
    char localNumber[LOCAL_NUMBER_LENGTH];
    int personCount = 0;
    double totaltime;
    clock_t start,end;
    int i;
    int full = 0;

    // get input file name from user
    char fileName[500];
    printf("Type filename:  > ");
    scanf("%s",fileName);

    personFile = fopen(fileName,"r");

    if (personFile == NULL){
        printf("NO SUCH FILE!\n");
        return 0;
    }

    char garbage[1000];

    // measure time for inserting all persons into the hash table
    start = clock();
    
    // reade lines while not end of file
    while (fscanf(personFile, "%[^,], %[^,], %[^,], %[^,], %[^\n]", firstName, lastName, countryCode, areaCode, localNumber) != EOF) {
        /*printf("%s ", firstName);
        printf("%s ", lastName);
        printf("%s ",countryCode);
        printf("%s ", areaCode);
        printf("%s\n", localNumber);*/
        // try to insert person into hash table
        if (insertPerson(persons, firstName, lastName, countryCode, areaCode, localNumber, &personCount) == 0){
            printf("Hash table full. File not processed completely!\n");
            full = 1;
            break;
        }

        if (fscanf(personFile, "%999[^a-zA-Z']", garbage) == EOF) {
            // End of file garbage
            break;
        }
    }

    fclose(personFile);
    
    end = clock();

    // printing status information
    if (full == 0) {
        printf("Total number persons inserted: %d\n",personCount);
        printf("\n\nFillrate is: %f%%\n", (personCount/(double)MAX_PERSONS)*100.00);
        printf("Total number of collisions: %d\n", collision);
        printf("Longest probing sequece: %d\n", longest_probing_sequence);
    }

    totaltime = (double)(end-start)/CLOCKS_PER_SEC;
    printf("\n\nTime for inserting %f seconds\n", totaltime);
    
    
    free(persons);
    return 0;
}

链地址法的基本思路是:将冲突的元素放在同一个链表中,查找时只需要在对应的链表中查找即可。

首先,我们需要定义一个链表结构体,用于存储冲突的元素:

typedef struct Node {
    person_struct person;
    struct Node* next;
} Node;



然后,在定义哈希表的时候,我们需要将每个桶改为一个链表的头结点,即:

Node* hashTable[MAX_PERSONS] = {NULL};

在插入元素时,我们需要首先计算出该元素应该被放在哪个桶中,然后遍历该桶对应的链表,查看是否有与该元素冲突的元素。如果有,则将该元素插入到链表的末尾;否则,直接将该元素作为链表的头结点。


int insertPerson(person_struct *name_store,
                char *fi_name,
                char *la_name,
                char *c_code,
                char *a_code,
                char *l_number,
                int *allPersons) {
    // ...
    // compute hash for the key
    hash = count_hash(MAX_PERSONS, tempFullName, fullNameLength);

    // insert person into hash table using chain method
    Node* node = hashTable[hash];
    while (node != NULL) {
        if (strcmp(node->person.fullName, tempFullName) == 0) {
            // duplicate person, do not insert
            return 0;
        }
        node = node->next;
    }
    // create new node and insert at the beginning of the list
    Node* new_node = (Node*)malloc(sizeof(Node));
    new_node->person.first_name = tempFiName;
    new_node->person.last_name = tempLaName;
    new_node->person.fullName = tempFullName;
    new_node->person.countryCode = tempCoCode;
    new_node->person.areaCode = tempArCode;
    new_node->person.localNumber = tempLoNumber;
    new_node->next = hashTable[hash];
    hashTable[hash] = new_node;
    // ...
}

最后,在查询元素时,我们只需要先计算出该元素应该在哪个桶中,然后遍历对应的链表即可。


person_struct* searchPerson(char* fullName) {
    int hash = count_hash(MAX_PERSONS, fullName, strlen(fullName));
    Node* node = hashTable[hash];
    while (node != NULL) {
        if (strcmp(node->person.fullName, fullName) == 0) {
            return &(node->person);
        }
        node = node->next;
    }
    return NULL;
}


完整代码如下:


#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>

#define MAX_PERSONS 100000    // hash table size - experiment with different values
#define MAX_NAME_LENGTH 50
#define COUNTRY_CODE_LENGTH 4
#define AREA_CODE_LENGTH 4
#define LOCAL_NUMBER_LENGTH 8

// struct to store a persosn name and phone nummber details
typedef struct person {
    char *first_name;
    char *last_name;
    char *fullName;
    char *countryCode;
    char *areaCode;
    char *localNumber;
    struct person *next;
} person_struct;

// global variables for printing status infomation
int collision = 0;
int longest_probing_sequence = 0;

// The bad hash function! TODO: Implement a better hesh function!
int count_hash(int mod, char* fullName, int fullNameLength) {
    int hash = 0;
    for(int i=0; i < fullNameLength; i++){
        hash = (hash*71 + fullName[i])%mod;
    }
    return hash;
}

int insertPerson(    person_struct *name_store,
            char *fi_name,
            char *la_name,
            char *c_code,
            char *a_code,
            char *l_number,
            int *allPersons) {
    int i;
    int hash,firsthash;
    struct person *newPerson;

    // Allocating memory for person information
    int fiNameLength = strlen(fi_name);
    char *tempFiName = malloc(fiNameLength*sizeof(char)+1);
    int laNameLength = strlen(la_name);
    char *tempLaName = malloc(laNameLength*sizeof(char)+1);
    int coCodeLength = strlen(c_code);
    char *tempCoCode = malloc(coCodeLength*sizeof(char)+1);
    int arCodeLength = strlen(a_code);
    char *tempArCode = malloc(arCodeLength*sizeof(char)+1);
    int loNumberLength = strlen(l_number);
    char *tempLoNumber = malloc(loNumberLength*sizeof(char)+1);
    int fullNameLength = MAX_NAME_LENGTH*2;
    char *tempFullName = malloc(fullNameLength*sizeof(char)+1);

    strcpy(tempFiName, fi_name);
    strcpy(tempLaName, la_name);
    strcpy(tempCoCode, c_code);
    strcpy(tempArCode, a_code);
    strcpy(tempLoNumber, l_number);

    //concatenating first name and last name into a variable for the key
    strcpy(tempFullName, tempFiName);
    strcat(tempFullName, tempLaName);

    //changeing the key to capital letters
    for (i=0; i < fullNameLength; i++) {
        tempFullName[i] = toupper(tempFullName[i]);
    }
    tempFullName[i] = 0;

    /*printf("%s %s %s %s\n",tempFullName,tempArCode,tempArCode,tempLoNumber);*/
    // counter for number of persons stored in the hash table
    *allPersons = *allPersons+1;

    // compute hash for the key
    hash = count_hash(MAX_PERSONS, tempFullName, fullNameLength);
    //printf("%d",hash);
    // insert person into hash table
    firsthash = hash;
    int stored = 0;

    newPerson = malloc(sizeof(person_struct));
    newPerson->first_name = tempFiName;
    newPerson->last_name = tempLaName;
    newPerson->fullName = tempFullName;
    newPerson->countryCode = tempCoCode;
    newPerson->areaCode = tempArCode;
    newPerson->localNumber = tempLoNumber;
    newPerson->next = NULL;

    if (name_store[hash].fullName == NULL) {
        name_store[hash] = *newPerson;
    }
    else {
        struct person *temp = &name_store[hash];
        while (temp->next != NULL) {
            temp = temp->next;
        }
        temp->next = newPerson;
    }
    return 1;
}


// main function
int main() {
    FILE *personFile;
    // allocate memory for hash table and initialize needed variables
    person_struct *persons = (person_struct *)calloc(MAX_PERSONS+1, sizeof(person_struct));
    char firstName[MAX_NAME_LENGTH];
    char lastName[MAX_NAME_LENGTH];
    char countryCode[COUNTRY_CODE_LENGTH];
    char areaCode[AREA_CODE_LENGTH];
    char localNumber[LOCAL_NUMBER_LENGTH];
    int personCount = 0;
    double totaltime;
    clock_t start,end;
    int i;
    int full = 0;

    // get input file name from user
    char fileName[500];
    printf("Type filename:  > ");
    scanf("%s",fileName);

    personFile = fopen(fileName,"r");

    if (personFile == NULL){
        printf("NO SUCH FILE!\n");
        return 0;
    }

    char garbage[1000];

    // measure time for inserting all persons into the hash table
    start = clock();

    // reade lines while not end of file
    while (fscanf(personFile, "%[^,], %[^,], %[^,], %[^,], %[^\n]", firstName, lastName, countryCode, areaCode, localNumber) != EOF) {
        /*printf("%s ", firstName);
        printf("%s ", lastName);
        printf("%s ",countryCode);
        printf("%s ", areaCode);
        printf("%s\n", localNumber);*/
        // try to insert person into hash table
        if (insertPerson(persons, firstName, lastName, countryCode, areaCode, localNumber, &personCount) == 0){
            printf("Hash table full. File not processed completely!\n");
            full = 1;
            break;
        }

        if (fscanf(personFile, "%999[^a-zA-Z']", garbage) == EOF) {
            // End of file garbage
            break;
        }
    }

    fclose(personFile);

    end = clock();

    // printing status information
    if (full == 0) {
        printf("Total number persons inserted: %d\n",personCount);
        printf("\n\nFillrate is: %f%%\n", (personCount/(double)MAX_PERSONS)*100.00);
        printf("Total number of collisions: %d\n", collision);
        printf("Longest probing sequece: %d\n", longest_probing_sequence);
    }

    totaltime = (double)(end-start)/CLOCKS_PER_SEC;
    printf("\n\nTime for inserting %f seconds\n", totaltime);

    free(persons);

    return 0;
}