Implementing Quadratic Sensing and Chains - Search Dictionary

I have a few questions about the assignment I need to do. It may seem that what I need to look for is to get the code, however, what I'm trying to do is learn, because after several weeks of searching for the information I lost. i m really new atc`.

Here is the purpose:

  • Given the 3 files ( foo.txt, bar.txt, foo2.txt), all of them have a different number of words (I need to use dynamic memory).

Create a program that requests the word, and let it know if this word is in any of the documents (the result is the name of the document where it appears).

Example:

  • Enter the word: dog
  • "dog" is in the file foo.txt and bar.txt

(I think I need to upload 3 files, create a hash table with key values ​​for each word in the documents, and also something that tells you which one is the document, where the word is).

I think I need to implement:

  • A Hash Functionthat converts a word toHashValue
  • A Hash Tablein which HashValueeach word is stored (but I think I should also store the index of the document?).
  • Use dynamic allocation.
  • Check collisionswhile im insert values ​​into hash table (using Quadratic Probingas well Chaining).
  • I also need to know how many times the word im appears in the text that it is looking for.

I was looking for hash map implementations, hash tables, quadratic probing, a hash function for strings ... but my head is confusing now and I don't know where I should start from.

so far I read:

, (scrabble)?

C /?

https://gist.github.com/tonious/1377667

-

http://www.cs.yale.edu/homes/aspnes/pinewiki/C(2f)HashTables.html?highlight=(CategoryAlgorithmNotes)

https://codereview.stackexchange.com/questions/115843/dictionary-implementation-using-hash-table-in-c

.

, .

.

  • .
  • -, @Shasha99 TRIE, , .
  • @MichaelDorgan , Hashing ( ), , Hash-, Hash- , , , .

, :

  • , , , , .
    typedef struct WordMetadata {
        char* Word;
        int Documents[5];
        int DocumentsCount;
    } WordMetadata;
  • ,
       void InitTable (WordMetadata **Table) {
            Table = (WordMetadata**) malloc (sizeof(WordMetadata) * TABLESIZE);
            for (int i = 0; i < TABLESIZE; i++) {
                Table[i] = (WordMetadata*) NULL;
            }
        }
 int Hash (char *WordParam) {

            for (int i = 0; *WordParam != '\0';) {

                i += *WordParam++;

            }

            return (i % TABLESIZE);}

2

- , , , ( , )

3

, (, , ), -, .

  • 85% (~ 200 ) .

  • - ramdom, , , , , ...

( ) :

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define TABLESIZE 4001
#define LINESIZE 2048
#define DELIMITER " \t"

typedef struct TTable {
    char*   Word;               /* The actual word  */
    int     Documents[5];           /* Documents Index */
    int     DocumentsCount;             /* Number of documents where the word exist */
} TTable;


int Hash (char *Word);
void Index (TTable **HashTable, char* Word, int DocumentIndex);
int Search (TTable **HashTable, char* Word);
int mystrcmp(char *s1, char *s2);
char* Documents[] = {"foo.txt","bar.txt","foo2.txt",NULL};


int main() {

    FILE* file;
    TTable **HashTable
    int DocumentIndex;
    char Line[LINESIZE];
    char* Word;
    char* Tmp;

    HashTable = (TTable**) malloc (sizeof(TTable)*TABLESIZE);
    for (int i = 0; i < TABLESIZE; i++) {
      HashTable[i] = (TTable*) NULL;
    }

    for (DocumentIndex = 0; Documents[DocumentIndex] != NULL; DocumentIndex++) {

      file = fopen(Documents[DocumentIndex],"r");
      if (file == NULL) {

          fprintf(stderr, "Error%s\n", Documents[DocumentIndex]);
          continue;

      }


      while (fgets (Line,LINESIZE,file) != NULL) {

          Line[LINESIZE-1] = '\0';
          Tmp = strtok (Line,DELIMITER);

          do {

              Word = (char*) malloc (strlen(Tmp)+1);
              strcpy(Word,Tmp);
              Index(HashTable,Word,DocumentIndex);
              Tmp = strtok(NULL,DELIMITER);
          } while (Tmp != NULL);

      }

        fclose(file);

    }


        printf("Enter the word:");
        fgets(Line,100,stdin);
        Line[strlen(Line)-1]='\0'; //fgets stores newline as well. so removing newline.
        int i = Search(HashTable,Line);
        if (i != -1) {
          for (int j = 0; j < HashTable[i]->DocumentsCount; j++) {
            printf("%s\n", Documents[HashTable[i]->Documents[j]]);
            if ( j < HashTable[i]->DocumentsCount-1) {

                printf(",");
            }
          }
        }

        else {
          printf("Cant find word\n");
        }


        for (i = 0; i < TABLESIZE; i++) {
          if (HashTable[i] != NULL) {

              free(HashTable[i]->Word);
              free(HashTable[i]);

          }
        }


return 0;
}

/* Theorem: If TableSize is prime and ? < 0.5, quadratic
probing will always find an empty slot
*/
int Search (TTable **HashTable, char* Word) {

    int Aux = Hash(Word);
    int OldPosition,ActualPosition;

    ActualPosition = -1;

    for (int i = 0; i < TABLESIZE; i++) {
      OldPosition = ActualPosition;
      ActualPosition = (Aux + i*i) % TABLESIZE;

      if (HashTable[ActualPosition] == NULL) {
        return -1;
      }

    if (strcmp(Word,HashTable[ActualPosition]->Word) == 0) {

        return ActualPosition;

    }
    }

    return -1; // Word not found
}


void Index (TTable **HashTable, char* Word, int DocumentIndex) {

    int Aux; //Hash value
    int OldPosition, ActualPosition;

    if ((ActualPosition = Search(HashTable,Word)) != -1) {

        for (int j = 0; j < HashTable[ActualPosition]->DocumentsCount;j++) {

            if(HashTable[ActualPosition]->Documents[j] == DocumentIndex) {
              return;
            }

        }

        HashTable[ActualPosition]->Documents[HashTable[ActualPosition]->DocumentsCount] = DocumentIndex;        HashTable[ActualPosition]->DocumentsCount++;
        return;
    }

    ActualPosition = -1;
    Aux = Hash(Word);

    for (int i = 0; i < TABLESIZE; i++) {

        OldPosition = ActualPosition;
        ActualPosition = (Aux + i*i) % TABLESIZE;
        if (OldPosition == ActualPosition) {
          break;
        }

    if (HashTable[ActualPosition] == NULL) {

        HashTable[ActualPosition] = (TTable*)malloc (sizeof(TTable));
        HashTable[ActualPosition]->Word = Word;
        HashTable[ActualPosition]->Documents[0] = DocumentIndex;
        HashTable[ActualPosition]->DocumentsCount = 1;
        return;
    }

    }

    printf("No more free space\n");

}


int Hash (char *Word) {

    int HashValue;
    for (HashValue = 0; *Word != '\0';) {
      HashValue += *Word++;
    }

    return (HashValue % TABLESIZE);
}
+4
2

TRIE , , . _, :

  • TRIE, node . 3 1, 3 node .
  • , TRIE node.

TRIE, , , O (1).


Hash Tables, :



EDIT 2:

!!!

:

gets(), fgets(). , :

gets(Line);

:

fgets(Line,100,stdin);
Line[strlen(Line)-1]='\0'; //fgets stores newline as well. so removing newline.

:

if ( j < HashTable[j]->DocumentsCount-1){

. , HashTable[i]:

if ( j < HashTable[i]->DocumentsCount-1){

:

HashTable[ActualPosition]->Documents[HashTable[ActualPosition]->DocumentsCount];

. :

HashTable[ActualPosition]->Documents[HashTable[ActualPosition]->DocumentsCount] = DocumentIndex;


Malloc void. :

HashTable[ActualPosition] = (TTable*)malloc (sizeof(TTable));

Documents node Hash:

for(j=0;j<5;j++)HashTable[ActualPosition]->Documents[j]=-1;


HashTable , . , while.

while while(1) - , .

!!!

+1

, , . :

  • 3 char ( char **, ), .
  • /, , . , , , .. , .
  • char * char ** . ( 100 , num_words = 100; fooPtr = malloc(sizeof(char *) * num_words);
  • . " 2D-" .

3 .

for char. , . , , .

:

  • , (O (log n)).
  • - O (1) ( , , .)

, . .

- - - , , , - .

0

Source: https://habr.com/ru/post/1660837/


All Articles