:
, , : . , , . , , . ?
, , :
NB: 0,4, .
, , tidyverse, , , , .
library(tm)
library(lsa)
library(tidyverse)
get_cos_sim <- function(corpus) {
doc <- corpus %>%
VectorSource %>%
tm::VCorpus()
tfm <- doc %>%
DocumentTermMatrix(
control = corpus %>% list(
removePunctuation = TRUE,
wordLengths = c(1, Inf),
weighting = weightTf)) %>%
as.matrix()
sim <- NULL
for(i in 1:nrow(tfm)) {
sim_i <- apply(
X = tfm,
MARGIN = 1,
FUN = lsa::cosine,
tfm[i,])
sim <- rbind(sim, sim_i)
}
diag(sim) <- 0
rownames(sim) <- corpus
return(sim)
}
strings <- c(
"Dan is a good man and very smart",
"A good man is rare",
"Alex can be trusted with anything",
"Dan likes to share his food",
"Rare are man who can be trusted",
"Please share food")
sim <- get_cos_sim(strings)
sim <- sim > .4
! , Chalermsook Chuzhoy: Maximum Independent Rectangles, igraph. , ,
library(igraph)
cliques <- sim %>%
dplyr::as_data_frame() %>%
mutate(from = row_number()) %>%
gather(key = 'to', value = 'edge', -from) %>%
filter(edge == T) %>%
graph_from_data_frame(directed = FALSE) %>%
max_cliques()
vertices longes . Caveat:, , . . igraph , , , - -
string_cliques_index <- cliques %>%
unlist %>%
names %>%
as.numeric
string_uniques_index <- colnames(sim)[!colnames(sim) %in% string_cliques_index] %>%
as.numeric
all_distict <- cliques %>%
lapply(names) %>%
lapply(as.numeric) %>%
c(string_uniques_index)
lapply(all_distict, find_longest, strings)
:
:
strings <- c(
"Dan is a good man and very smart",
"A good man is rare",
"Alex can be trusted with anything",
"Dan likes to share his food",
"Rare are man who can be trusted",
"Please share food",
"NASA is a government organisation",
"The FBI organisation is part of the government of USA",
"Hurricanes are a tragedy",
"Mangoes are very tasty to eat ",
"I like to eat tasty food",
"The thief was caught by the FBI")
:
Dan is a good man and very smart FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
A good man is rare TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
Alex can be trusted with anything FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
Dan likes to share his food FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE
Rare are man who can be trusted FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
Please share food FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
NASA is a government organisation FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
The FBI organisation is part of the government of USA FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE
Hurricanes are a tragedy FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
Mangoes are very tasty to eat FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE
I like to eat tasty food FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE
The thief was caught by the FBI FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
, :
Dan is a good man and very smart
Alex can be trusted with anything
Dan likes to share his food
NASA is a government organisation
The FBI organisation is part of the government of USA
Hurricanes are a tragedy
Mangoes are very tasty to eat
A good man is rare
Rare are man who can be trusted
Please share food
I like to eat tasty food
The thief was caught by the FBI
, .
, ,
[[1]]
[1] "The FBI organisation is part of the government of USA"
[[2]]
[1] "Dan is a good man and very smart"
[[3]]
[1] "Alex can be trusted with anything"
[[4]]
[1] "Dan likes to share his food"
[[5]]
[1] "Mangoes are very tasty to eat "
[[6]]
[1] "NASA is a government organisation"
[[7]]
[1] "Hurricanes are a tragedy"
!
, , , , .