I would use unlist(strsplit)and then index the resulting vector. You can make this a function so that the number of words to extract pre and post is a flexible parameter:
getContext <- function(text, look_for, pre = 3, post=pre) {
t_vec <- unlist(strsplit(text, '\\s'))
matches <- which(t_vec==look_for)
if(length(matches) > 0) {
out <-
list(before = ifelse(m-pre < 1, NA,
sapply(matches, function(m) t_vec[(m - pre):(m - 1)])),
after = sapply(matches, function(m) t_vec[(m + 1):(m + post)]))
return(out)
} else {
warning('No matches')
}
}
Works for one match.
getContext(text, 'Verulam')
# $before
# [,1]
# [1,] "and"
# [2,] "created"
# [3,] "Baron"
#
# $after
# [,1]
# [1,] "in"
# [2,] "1618[4]"
# [3,] "and"
Also works if there is more than one match.
getContext(text, 'he')
# $before
# [,1] [,2] [,3] [,4]
# [1,] "After" "nature." "in" "John"
# [2,] "his" "Most" "1621;[3][b]" "Aubrey"
# [3,] "death," "importantly," "as" "stating"
#
# $after
# [,1] [,2] [,3] [,4]
# [1,] "remained" "argued" "died" "contracted"
# [2,] "extremely" "this" "without" "the"
# [3,] "influential" "could" "heirs," "condition"
getContext(text, 'fruitloops')
# Warning message:
# In getContext(text, "fruitloops") : No matches