Partial intersection of elements over vectors in two lists

I have a list like this:

mylist <- list(PP = c("PP 1", "OMITTED"),
           IN01 = c("DID NOT PARTICIPATE", "PARTICIPATED", "OMITTED"),                     
           RD1 = c("YES", "NO", "NOT REACHED", "INVALID", "OMITTED"),
           RD2 = c("YES", "NO", "NOT REACHED", "NOT AN OPTION", "OMITTED"),
           LOS = c("LESS THAN 3", "3 TO 100", "100 TO 500", "MORE THAN 500", "LOGICALLY NOT APPLICABLE", "OMITTED"),
           COM = c("BAN", "SBAN", "RAL"), 
           VR1 = c("WITHIN 30", "WITHIN 200", "NOT AVAILABLE", "OMITTED"),                         
           INF = c("A LOT", "SOME", "LITTLE OR NO", "NOT APPLICABLE", "OMITTED"),               
           IST = c("FULL-TIME", "PART-TIME", "FULL STAFFED", "NOT STAFFED", "LOGICALLY NOT APPLICABLE", "OMITTED"),
           CMP = c("ALL", "MOST", "SOME", "NONE", "LOGICALLY NOT APPLICABLE", "OMITTED"))

I have the following list:

matchlist <- list("INVALID", c("INVALID", "OMITTED OR INVALID"),
c("INVALID", "OMITTED"), "OMITTED", c("NOT REACHED", "INVALID", "OMITTED"),
c("LOGICALLY NOT APPLICABLE", "INVALID", "OMITTED"),
c("LOGICALLY NOT APPLICABLE", "INVALID", "OMITTED OR INVALID"),
c("Not applicable", "Not stated"), c("Not reached", "Not administered/missing by design", "Presented but not answered/invalid"),
c("Not administered/missing by design", "Presented but not answered/invalid"),
"OMITTED OR INVALID",
c("LOGICALLY NOT APPLICABLE", "OMITTED OR INVALID"),
c("NOT REACHED", "OMITTED"),
c("NOT APPLICABLE", "OMITTED"), 
c("LOGICALLY NOT APPLICABLE", "OMITTED"),
c("LOGICALLY NOT APPLICABLE", "NOT REACHED", "OMITTED"),
"NOT EXCLUDED", c("Default", "Not applicable", "Not stated"), c("Valid Skip", "Not Reached", "Not Applicable", "Invalid", "No Response"),
c("Not administered", "Omitted"),
c("NOT REACHED", "INVALID RESPONSE", "OMITTED"),
c("INVALID RESPONSE", "OMITTED"))

As you can see, some of the vectors in matchlistpartially correspond to the vectors in mylist. In some cases, the vectors in matchlisthave an exact correspondence with a part of the vectors in mylist. For example, the last values RD1in mylistcorrespond to the vector in the fifth component matchlist, but RD2do not correspond to it, although there are common values. The values ​​in RD2in mylist("NOT REACHED", "NOT OPTION", "OMITTED") together and in this order do not correspond in any of the vectors in matchlist. The same for values COMin mylist.

, , - mylist matchlist, , , matchlist . :

$PP
[1] "OMITTED"

$IN01
[1] "OMITTED"

$RD1
[1] "NOT REACHED" "INVALID" "OMITTED"

$RD2
character(0)

$LOS
[1] "LOGICALLY NOT APPLICABLE" "OMITTED"

$COM
character(0)

$VR1
[1] "OMITTED"

$INF
[1] "NOT APPLICABLE" "OMITTED"

$IST
[1] "LOGICALLY NOT APPLICABLE" "OMITTED"

$CMP
[1] "LOGICALLY NOT APPLICABLE" "OMITTED"

, :

intersect

lapply(mylist, function(i) {
  intersect(i, lapply(matchlist, function(i) {i}))
})

matchlist ( "OMITTED" ).

match %in%:

lapply(mylist, function(i) {
  i[which(i %in% matchlist)]
})

RD1 ( "INVALID", "OMITTED" ), ( "OMITTED" ), COM, .

mapply intersect:

mapply(intersect, mylist, matchlist)

, , , .

- , ?

+4
4

/ , unlist. , matchlist, . , , double-lapply, :

out <- lapply(mylist, function(this) {
  mtch <- lapply(matchlist, intersect, this)
  wh <- which.max(lengths(mtch))
  if (length(wh)) mtch[[wh]] else character(0)
})
str(out)
# List of 9
#  $ PP  : chr "OMITTED"
#  $ IN01: chr "OMITTED"
#  $ RD1 : chr [1:3] "NOT REACHED" "INVALID" "OMITTED"
#  $ LOS : chr [1:2] "LOGICALLY NOT APPLICABLE" "OMITTED"
#  $ COM : chr(0) 
#  $ VR1 : chr "OMITTED"
#  $ INF : chr [1:2] "NOT APPLICABLE" "OMITTED"
#  $ IST : chr [1:2] "LOGICALLY NOT APPLICABLE" "OMITTED"
#  $ CMP : chr [1:2] "LOGICALLY NOT APPLICABLE" "OMITTED"

, (-) , , . ( , : which.max ? , , .)

UPDATE

, matchlist, , , . , , mylist$RD1 "BLAH", matchlist[[5]].

(, , ), , . *apply...

(NB: , $RD1 character(0), "INVALID", matchlist, , .)

out <- lapply(mylist, function(this) {
  ind <- lapply(matchlist, function(a) which(this == a[1]))
  perfectmatches <- mapply(function(ml, allis, this) {
    length(ml) * any(sapply(allis, function(i) all(ml == this[ i + seq_along(ml) - 1 ])))
  }, matchlist, ind, MoreArgs = list(this=this))
  if (any(perfectmatches) > 0) {
    wh <- which.max(perfectmatches)
    return(matchlist[[wh]])
  } else return(character(0))
})
str(out)
# List of 9
#  $ PP  : chr "OMITTED"
#  $ IN01: chr "OMITTED"
#  $ RD1 : chr "INVALID"
#  $ LOS : chr [1:2] "LOGICALLY NOT APPLICABLE" "OMITTED"
#  $ COM : chr(0) 
#  $ VR1 : chr "OMITTED"
#  $ INF : chr [1:2] "NOT APPLICABLE" "OMITTED"
#  $ IST : chr [1:2] "LOGICALLY NOT APPLICABLE" "OMITTED"
#  $ CMP : chr [1:2] "LOGICALLY NOT APPLICABLE" "OMITTED"
+1

, unlist matchlist:

lapply(mylist, function(x) x[x %in% unlist(matchlist)])

( ):

$PP
[1] "OMITTED"

$IN01
[1] "OMITTED"

$RD1
[1] "NOT REACHED" "INVALID"     "OMITTED"    

$LOS
[1] "LOGICALLY NOT APPLICABLE" "OMITTED"                 

$COM
character(0)

$VR1
[1] "OMITTED"

$INF
[1] "NOT APPLICABLE" "OMITTED"       

$IST
[1] "LOGICALLY NOT APPLICABLE" "OMITTED"                 

$CMP
[1] "LOGICALLY NOT APPLICABLE" "OMITTED"                 
+4

lapply(mylist, intersect, unlist(matchlist))

.

+3
lapply(mylist, function(i) {
  unlist(sapply(i,function(x){if(any(grepl(paste0("^",x,"$"),matchlist))){x}}))
})

"\ b" - "", "NOT". grepl - , :)

+2

Source: https://habr.com/ru/post/1694869/


All Articles