Get token index from stanford corenlp

I want to get the token index using CoreNLP. I can get all token annotations like POS, NER, but the index returns null. Here is the code:

import edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.SentenceIndexAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.IndexAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import java.util.Properties;
import edu.stanford.nlp.util.CoreMap;    

protected StanfordCoreNLP pipeline;
private void getPipeline(){
    // creates a StanfordCoreNLP object, with POS tagging, lemmatization, parsing
    Properties props = new Properties();
    props.put("annotators", "tokenize, ssplit, pos, lemma, parse");
    pipeline = new StanfordCoreNLP(props);
}

private Annotation annotatedocument(String text) {
    Annotation document = new Annotation(text);

    // run all Annotators on this text
    pipeline.annotate(document);
    return document;    
}

private  void  getAnnotation(String sentence){  
    Annotation annotation = annotatedocument(sentence);
    List<CoreMap> sentences = annotation.get(SentencesAnnotation.class);
    int numSent=sentences.size();
    assert (numSent == 1):"Number of sentences in annotation  is > 1: " + numSent;

    for(CoreMap sentence: sentences) {  
        List<CoreLabel> tokens = sentence.get(TokensAnnotation.class);
        for (CoreLabel token: tokens) { 
            String word = token.get(TextAnnotation.class);
            Integer index= token.get(IndexAnnotation.class);
            String pos=token.getString(PartOfSpeechAnnotation.class);
        }
    }
}

I get the word and POS correctly, but the index null.

+4
source share

Source: https://habr.com/ru/post/1608238/


All Articles