C, , , , . , , 0 filterdata not_wspc :
int not_wspc(void *, char c) {
if isspace((unsigned char)c) return 0;
if ((c == '.') || (c == ',')) return 0;
return 1;
}
typedef struct {
char c;
int pos;
} charwithpos;
KGram *foo(const char *input, int (*filter)(void *,char), void *filterdata) {
size_t len = strlen(input);
charwithpos *filtered = malloc(len * sizeof(*filtered));
assert(filtered);
charwithpos *current = filtered
for (size_t i = 0; i < len; ++i) {
if (filter(filterdata, input[i])) {
current->c = input[i];
current->pos = i;
++current;
}
}
size_t shortlen = (current - filtered);
KGram *result = malloc((shortlen / 5 + 1) * sizeof(*result));
assert(result);
KGram *currentgram = result;
current = filtered;
for (size_t i = 0; i < shortlen; ++i) {
currentgram->text[i%5] = current->c;
if ((i % 5) == 0) {
currentgram->start = current->pos;
} else if ((i % 5) == 4) {
currentgram->end = current->pos;
++currentgram;
}
++current;
}
if (shortlen % 5) != 0 {
currentgram->end = filtered[shortlen-1].pos;
currentgram->text[shortlen%5] = 0;
}
free(filtered);
return(result);
}
- , . , , filtered , , . , , . , C, , .
++-, <functional>. , new: , , , ; -)
template <typename OutputIterator>
struct KGramOutput {
OutputIterator dest;
KGram kgram;
KGramOutput(OutputIterator dest) : dest(dest) {}
void add(char, size_t);
void flush(void);
};
template <typename InputIterator, typename OutputIterator, typename Filter>
void foo(InputIterator first, InputIterator last, OutputIterator dest, Filter filter) {
size_t i = 0;
KGramOutput<OutputIterator> kgram(dest);
while (first != last) {
if (filter(*first)) kgram.add(*first, i);
++first;
++i;
}
kgram.flush();
}
Functions addand a flushlittle tedious, they have to group 5 pairs into a KGram structure, and then do it *dest++ = kgram. The user could pass, for example, pushback_iteratorover vector<KGram>as an output iterator. Btw '5' and 'char' can also be template parameters.