- ~ 50 . , , , .
, , .
. , . , , . .
, 1,2,.., n , . , .
, , , .
, whet, , :
HashSet<Long> foundHashes = new HashSet<Long>();
LinkedList<String> words = new LinkedList<String>();
for(int i=0; i<params.maxPhrase; i++) words.addLast("");
StandardTokenizer st = new StandardTokenizer(new StringReader(docText));
Token t = new Token();
while(st.next(t) != null) {
String token = new String(t.termBuffer(), 0, t.termLength());
words.addLast(token);
words.removeFirst();
for(int len=params.minPhrase; len<params.maxPhrase; len++) {
String term = Utils.join(new ArrayList<String>(words.subList(params.maxPhrase-len,params.maxPhrase)), " ");
long hash = Utils.longHash(term);
if(params.lexicon.isTermHash(hash)) {
foundHashes.add(hash);
}
}
}
for(long hash : foundHashes) {
if(count.containsKey(hash)) {
count.put(hash, count.get(hash) + 1);
} else {
count.put(hash, 1);
}
}