Random Weighted Average Increase

I wrote code to randomly add points to a numeric variable to increase the weighted average score by 10%, while storing new grades in variable S1.

This is done by calculating the total number of points that need to be added in order to increase the average value by 10%. The next step is to randomly select until the weighted sum of answers is equal to the goal, but does not add points where the score is already 10 so as not to transmit the maximum value on the scale. The final step is to choose whether the nearest amount will be closest or just below the target, and select this pattern to add points.

The code works fine, but does not look efficient. I am new to R and have read that loops should be avoided as much as possible but cannot develop an alternative. Is it possible to do what I'm trying, but more efficiently?

#Create random data    
library(stats)
    set.seed(21821)
    ncust <- 1000
    cust.df <- data.frame(cust.id=as.factor(c(1:ncust)))
    wtvar <- rnorm(ncust, mean=1, sd=0.2)
    V1 <- floor(rnorm(ncust, mean=7.5, sd=3))
    V1[V1 > 10] <- 10
    V1[V1 < 1] <- 1
    cust.df$V1 <- V1
    cust.df$wtvar <- wtvar

#Function to determine sample required   
    random.sample <- function(x) {
    (pctadd <- (sum(cust.df$V1*cust.df$wtvar)*0.1)/sum(cust.df$V1[!cust.df$V1==10]*cust.df$wtvar[!cust.df$V1==10])) #percentage of resps (weighted) who need points added to make 10% increase
    (numadd <- (sum(cust.df$V1*cust.df$wtvar)*0.1)) #sum of weights needed to make 10% increase

    wgttot <- vector(mode="numeric", length=0)
    idtot <- vector(mode="numeric", length=0)
    id.ref <- cust.df$cust.id[!cust.df$V1==10]

      repeat {
        preidtot <- idtot
        prewgttot <- wgttot
        (t.id <- as.numeric(sample(id.ref, 1)))
        (t.wgt <- cust.df$wtvar[cust.df$cust.id==t.id])
        id.ref <- id.ref[!id.ref==t.id]
        wgttot <- c(wgttot,t.wgt)
        idtot <- c(idtot, t.id)
        if (sum(wgttot) > numadd) break
      }
      prediff <- numadd - sum(prewgttot)
      postdiff <- sum(wgttot) - numadd
      if (prediff < postdiff) {
        x <- preidtot
      } else {
        x <- idtot
        }
      return(x)
    }

tempids <- random.sample()

#Apply sample rule
cust.df$S1 = ifelse(cust.df$cust.id %in% tempids, cust.df$V1 + 1, cust.df$V1)

#Check ~10% increase achieved
weighted.mean(cust.df$V1,cust.df$wtvar)
weighted.mean(cust.df$S1,cust.df$wtvar)
+4
source share
1 answer

random.sample- this is your first version, random.sample1- this is a version without a loop, it random.sample1performs the same random.sample function, but their results are different. You can check the code to see how the result is used random.sample1. And due to the fact that from your definition the required samples are not unique, therefore, the results of the weighted sum are also different, but they all increase approximately by 10%.

#Create random data    
library(stats)
set.seed(21821)
ncust <- 1000
cust.df <- data.frame(cust.id=as.factor(c(1:ncust)))
wtvar <- rnorm(ncust, mean=1, sd=0.2)
V1 <- floor(rnorm(ncust, mean=7.5, sd=3))
V1[V1 > 10] <- 10
V1[V1 < 1] <- 1
cust.df$V1 <- V1
cust.df$wtvar <- wtvar

#Function to determine sample required   
random.sample <- function() {
    (pctadd <- (sum(cust.df$V1*cust.df$wtvar)*0.1)/sum(cust.df$V1[!cust.df$V1==10]*cust.df$wtvar[!cust.df$V1==10])) #percentage of resps (weighted) who need points added to make 10% increase
    (numadd <- (sum(cust.df$V1*cust.df$wtvar)*0.1)) #sum of weights needed to make 10% increase

    wgttot <- vector(mode="numeric", length=0)
    idtot <- vector(mode="numeric", length=0)
    id.ref <- cust.df$cust.id[!cust.df$V1==10]

    repeat {
        preidtot <- idtot
        prewgttot <- wgttot
        (t.id <- as.numeric(sample(id.ref, 1)))
        (t.wgt <- cust.df$wtvar[cust.df$cust.id==t.id])
        id.ref <- id.ref[!id.ref==t.id]
        wgttot <- c(wgttot,t.wgt)
        idtot <- c(idtot, t.id)
        if (sum(wgttot) > numadd) break
    }
    prediff <- numadd - sum(prewgttot)
    postdiff <- sum(wgttot) - numadd
    if (prediff < postdiff) {
        x <- preidtot
    } else {
        x <- idtot
    }
    return(x)
}

random.sample1 <- function() {
    numadd <- sum(cust.df$V1 * cust.df$wtvar) * 0.1 #sum of weights needed to make 10% increase
    id.ref <- which(cust.df$V1 != 10)
    pos <- sample(id.ref, length(id.ref))
    t.wgt <- cust.df$wtvar[pos]
    sumwgttot <- cumsum(t.wgt)
    return(pos[1:which.min(abs(sumwgttot - numadd))])
}

system.time(tempids <- random.sample())
## On my computer, it uses about 0.200s to finish the calculation.
system.time(tempids1 <- random.sample1())
## On my computer, the without loop version uses about 0.000s.

#Apply sample rule
cust.df$S1 = ifelse(cust.df$cust.id %in% tempids, cust.df$V1 + 1, cust.df$V1)
## Note that the usage of tempids1 is different, this usage is more 
## effective than the original one.
cust.df$S2 = cust.df$V1
cust.df$S2[tempids1] = cust.df$V1[tempids1] + 1

#Check ~10% increase achieved
weighted.mean(cust.df$V1,cust.df$wtvar)
weighted.mean(cust.df$S1,cust.df$wtvar)
weighted.mean(cust.df$S2,cust.df$wtvar)
+7
source

Source: https://habr.com/ru/post/1675904/


All Articles