I have a few questions about the assignment I need to do. It may seem that what I need to look for is to get the code, however, what I'm trying to do is learn, because after several weeks of searching for the information I lost. i m really new atc`.
Here is the purpose:
- Given the 3 files (
foo.txt, bar.txt, foo2.txt), all of them have a different number of words (I need to use dynamic memory).
Create a program that requests the word, and let it know if this word is in any of the documents (the result is the name of the document where it appears).
Example:
- Enter the word: dog
- "dog" is in the file foo.txt and bar.txt
(I think I need to upload 3 files, create a hash table with key values for each word in the documents, and also something that tells you which one is the document, where the word is).
I think I need to implement:
- A
Hash Functionthat converts a word toHashValue - A
Hash Tablein which HashValueeach word is stored (but I think I should also store the index of the document?). - Use
dynamic allocation. - Check
collisionswhile im insert values into hash table (using Quadratic Probingas well Chaining). - I also need to know how many times the word im appears in the text that it is looking for.
I was looking for hash map implementations, hash tables, quadratic probing, a hash function for strings ... but my head is confusing now and I don't know where I should start from.
so far I read:
, (scrabble)?
C /?
https://gist.github.com/tonious/1377667
-
http://www.cs.yale.edu/homes/aspnes/pinewiki/C(2f)HashTables.html?highlight=(CategoryAlgorithmNotes)
https://codereview.stackexchange.com/questions/115843/dictionary-implementation-using-hash-table-in-c
.
, .
.
- .
- -, @Shasha99
TRIE, , . - @MichaelDorgan , Hashing ( ), , Hash-, Hash- , , , .
, :
typedef struct WordMetadata {
char* Word;
int Documents[5];
int DocumentsCount;
} WordMetadata;
void InitTable (WordMetadata **Table) {
Table = (WordMetadata**) malloc (sizeof(WordMetadata) * TABLESIZE);
for (int i = 0; i < TABLESIZE; i++) {
Table[i] = (WordMetadata*) NULL;
}
}
int Hash (char *WordParam) {
for (int i = 0; *WordParam != '\0';) {
i += *WordParam++;
}
return (i % TABLESIZE);}
2
- , , , ( , )
3
, (, , ), -, .
85% (~ 200 ) .
- ramdom, , , , , ...
( ) :
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define TABLESIZE 4001
#define LINESIZE 2048
#define DELIMITER " \t"
typedef struct TTable {
char* Word;
int Documents[5];
int DocumentsCount;
} TTable;
int Hash (char *Word);
void Index (TTable **HashTable, char* Word, int DocumentIndex);
int Search (TTable **HashTable, char* Word);
int mystrcmp(char *s1, char *s2);
char* Documents[] = {"foo.txt","bar.txt","foo2.txt",NULL};
int main() {
FILE* file;
TTable **HashTable
int DocumentIndex;
char Line[LINESIZE];
char* Word;
char* Tmp;
HashTable = (TTable**) malloc (sizeof(TTable)*TABLESIZE);
for (int i = 0; i < TABLESIZE; i++) {
HashTable[i] = (TTable*) NULL;
}
for (DocumentIndex = 0; Documents[DocumentIndex] != NULL; DocumentIndex++) {
file = fopen(Documents[DocumentIndex],"r");
if (file == NULL) {
fprintf(stderr, "Error%s\n", Documents[DocumentIndex]);
continue;
}
while (fgets (Line,LINESIZE,file) != NULL) {
Line[LINESIZE-1] = '\0';
Tmp = strtok (Line,DELIMITER);
do {
Word = (char*) malloc (strlen(Tmp)+1);
strcpy(Word,Tmp);
Index(HashTable,Word,DocumentIndex);
Tmp = strtok(NULL,DELIMITER);
} while (Tmp != NULL);
}
fclose(file);
}
printf("Enter the word:");
fgets(Line,100,stdin);
Line[strlen(Line)-1]='\0';
int i = Search(HashTable,Line);
if (i != -1) {
for (int j = 0; j < HashTable[i]->DocumentsCount; j++) {
printf("%s\n", Documents[HashTable[i]->Documents[j]]);
if ( j < HashTable[i]->DocumentsCount-1) {
printf(",");
}
}
}
else {
printf("Cant find word\n");
}
for (i = 0; i < TABLESIZE; i++) {
if (HashTable[i] != NULL) {
free(HashTable[i]->Word);
free(HashTable[i]);
}
}
return 0;
}
int Search (TTable **HashTable, char* Word) {
int Aux = Hash(Word);
int OldPosition,ActualPosition;
ActualPosition = -1;
for (int i = 0; i < TABLESIZE; i++) {
OldPosition = ActualPosition;
ActualPosition = (Aux + i*i) % TABLESIZE;
if (HashTable[ActualPosition] == NULL) {
return -1;
}
if (strcmp(Word,HashTable[ActualPosition]->Word) == 0) {
return ActualPosition;
}
}
return -1;
}
void Index (TTable **HashTable, char* Word, int DocumentIndex) {
int Aux;
int OldPosition, ActualPosition;
if ((ActualPosition = Search(HashTable,Word)) != -1) {
for (int j = 0; j < HashTable[ActualPosition]->DocumentsCount;j++) {
if(HashTable[ActualPosition]->Documents[j] == DocumentIndex) {
return;
}
}
HashTable[ActualPosition]->Documents[HashTable[ActualPosition]->DocumentsCount] = DocumentIndex; HashTable[ActualPosition]->DocumentsCount++;
return;
}
ActualPosition = -1;
Aux = Hash(Word);
for (int i = 0; i < TABLESIZE; i++) {
OldPosition = ActualPosition;
ActualPosition = (Aux + i*i) % TABLESIZE;
if (OldPosition == ActualPosition) {
break;
}
if (HashTable[ActualPosition] == NULL) {
HashTable[ActualPosition] = (TTable*)malloc (sizeof(TTable));
HashTable[ActualPosition]->Word = Word;
HashTable[ActualPosition]->Documents[0] = DocumentIndex;
HashTable[ActualPosition]->DocumentsCount = 1;
return;
}
}
printf("No more free space\n");
}
int Hash (char *Word) {
int HashValue;
for (HashValue = 0; *Word != '\0';) {
HashValue += *Word++;
}
return (HashValue % TABLESIZE);
}