An approach that extends the accepted answer to address cases such as ca, n't,
implicit class StanfordNLPConcat(val words: List[String]) extends AnyVal {
def SNLPConcat() = {
val sep = "#"
words.reduce{ (a,v) => if (v.contains("'")) a+v else a+sep+v }.split(sep).toList
}
}
Let be
val words = List("I", "'d", "like", "to", "fix", "this", "sentence", "because", "it", "'s", "broken")
and therefore
words.SNLPConcat()
res: List[String] = List(I'd, like, to, fix, this, sentence, because, it's, broken)
Further
List("It", "ca", "n't", "be", "wrong").SNLPConcat()
res: List[String] = List(It, can't, be, wrong)
source
share