Iterate over columns in a data frame to replace values from the corresponding data in the data frame list

Question

Iterate over columns in a data frame to replace values from the corresponding data in the data frame list

I am interested in creating a function using apply/ sapplyor Mapthat will iterate over the available columns in and replace the values in each column with the consistent values from the data frame, available in the unnamed list of data frames, with the list item index corresponding to the column number of the data frame . dta dta

Example

Defined objects:

set.seed(1)
size <- 20

# Data set
dta <-
    data.frame(
        unitA = sample(LETTERS[1:4], size = size, replace = TRUE),
        unitB = sample(letters[16:20], size = size, replace = TRUE),
        unitC = sample(month.abb[1:4], size = size, replace = TRUE),
        someValue = sample(1:1e6, size = size, replace = TRUE)
    )

# Meta data
lstMeta <- list(
    # Unit A definitions
    data.frame(
        V1 = c("A", "B", "D"),
        V2 = c("Letter A", "Letter B", "Letter D")
    ),
    # Unit B definitions
    data.frame(
        V1 = c("t", "q"),
        V2 = c("small t", "small q")
    ),
    # Unit C definitions
    data.frame(
        V1 = c("Mar", "Jan"),
        V2 = c("March", "January")
    )
)

Desired Results

When applied to, the function should return the corresponding extraction below: dtadata.frame

unitA       unitB    unitC      someValue
Letter B    small t  Apr        912876
Letter B    small q  March      293604
       C    s        Apr        459066
Letter D    p        March      332395
Letter A    small q  March      650871
Letter D    small q  Apr        258017
Letter D    p        January    478546
C           small q  Feb        766311
C           small t  March      84247
Letter A    small q  March      875322
Letter A    r        Feb        339073
Letter A    r        Ap         839441
C           r        Feb        346684
Letter B    p        January    333775
Letter D    small t  January    476352
(...)

Existing approach

replaceLbls <- function(dataSet, lstDict) {
    sapply(seq_along(dataSet), function(i) {
        # Take corresponding metadata data frame
        dtaDict <- lstDict[[i]]

        # Replace values in selected column
        # Where matches on V1 push corrsponding values from V2
        dataSet[,i][match(dataSet[,i], dtaDict[,1])] <- dtaDict[,2][match(dtaDict[,1], dataSet[,i])]  
    })
}

# Testing -----------------------------------------------------------------

replaceLbls(dataSet = dta, lstDict = lstMeta)

Of course, the approach proposed above does not work, as it will try to use NAin appointments; but he summarizes what I want to achieve:

x[...] <- m: NAs : : [<-.factor(*tmp*, match(dataSet[, i], dtaDict[, 1]), value = c(NA,: , NA

:

,
,
someValue , ,

dplyr/data.table/sqldf .
for -loops

+4

replace r dataframe apply sapply

Konrad 26 . '17 7:18

4

, for . factors characters , .

, lstMeta, , . <<-. R, , , , .

set.seed(1)
size <- 20

# Data set
dta <-
  data.frame(
    unitA = sample(LETTERS[1:4], size = size, replace = TRUE),
    unitB = sample(letters[16:20], size = size, replace = TRUE),
    unitC = sample(month.abb[1:4], size = size, replace = TRUE),
    someValue = sample(1:1e6, size = size, replace = TRUE),
    stringsAsFactors = F
  )

# Meta data
lstMeta <- list(
  # Unit A definitions
  data.frame(
    V1 = c("A", "B", "D"),
    V2 = c("Letter A", "Letter B", "Letter D"),
    stringsAsFactors = F
  ),
  # Unit B definitions
  data.frame(
    V1 = c("t", "q"),
    V2 = c("small t", "small q"),
    stringsAsFactors = F
  ),
  # Unit C definitions
  data.frame(
    V1 = c("Mar", "Jan"),
    V2 = c("March", "January"),
    stringsAsFactors = F
  )
)

replaceLbls <- function(dataSet, lstDict) {
  sapply(1:3, function(i) {
    # Take corresponding metadata data frame
    dtaDict <- lstDict[[i]]

    # Replace values in selected column
    # Where matches on V1 push corrsponding values from V2
    myUniques <- which(dataSet[,i] %in% dtaDict[,1])

    dataSet[myUniques,i]<<- dtaDict[,2][match(dataSet[myUniques,i],dtaDict[,1])]  
  })
  return(dataSet)
}

# Testing -----------------------------------------------------------------

replaceLbls(dataSet = dta, lstDict = lstMeta)

+2

Michael Bird 26 . '17 8:17

You can also try the following:

mapr<-function(t,meta){
    ind<-match(t,meta$V1)
    if(!is.na(ind)){return(meta$V2[ind])}
    else{return(t)}}

Then using sapply:

dta<-as.data.frame(cbind(sapply(1:3,function(t,df,meta){sapply(df[,t],mapr,lstMeta[[t]])},dta,lstMeta,simplify = T),dta[,4]))

+1

TUSHAr Jul 26 '17 at 8:36

source share

The couple mapplycan do the work

f1 <- function(df, lst){
  d1 <- setNames(data.frame(mapply(function(x, y) x$V2[match(y, x$V1)], lst, df[1:3]), 
                            df$someValue, stringsAsFactors = FALSE), 
                 names(df))
  as.data.frame(mapply(function(x, y) replace(x, is.na(x), y[is.na(x)]), d1, df))

}

0

Sotos Jul 26 '17 at 8:56

source share

docendo discimus · Accepted Answer · 2017-07-26T08:29:43+0000

:

replaceLbls <- function(dataSet, lstDict) {
  dataSet[seq_along(lstDict)] <- Map(function(x, lst) {
    x <- as.character(x)
    idx <- match(x, as.character(lst$V1))
    replace(x, !is.na(idx), as.character(lst$V2)[na.omit(idx)])
  }, dataSet[seq_along(lstDict)], lstDict)
  dataSet
}


head(replaceLbls(dta, lstMeta))
#      unitA   unitB unitC someValue
# 1 Letter B small t   Apr    912876
# 2 Letter B small q March    293604
# 3        C       s   Apr    459066
# 4 Letter D       p March    332395
# 5 Letter A small q March    650871
# 6 Letter D small q   Apr    258017

, X , , -. , , .

: , -, . , .. .

Iterate over columns in a data frame to replace values ​​from the corresponding data in the data frame list

Example

Desired Results

Existing approach

More articles:

Iterate over columns in a data frame to replace values from the corresponding data in the data frame list