Compute p values ​​in a for loop

I have a dataset containing experiment data. Every day I have a new observation.

A fictional example of my df with columns: day: day group a: data management group b: data processing.

structure(list(day = c(1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 3L), group_a = c(4L, 
2L, 3L, 1L, 1L, 4L, 3L, 2L, 4L), group_b = c(3L, 4L, 2L, 2L, 
2L, 2L, 3L, 4L, 5L)), .Names = c("day", "group_a", "group_b"), class = "data.frame", row.names = c(NA, 
-9L))

I want to multiply this dataset, apply the rank criteria signed by wilcoxon, for example:

test <- wilcox.test(df$group_a, df$group_b, alternative = 'g')
test$p.value

In this example, I apply the test across the entire dataset.

I want to apply it on the 1st day, then the 1st and 2nd, etc., finally getting a list similar to (fictitious data):

day p-value
1   0.02
2   0.03
3   0.3

How can I apply the test in a for loop for a "day", but in a "cumulative" number of days?

+4
source share
3 answers

Using:

for (i in unique(df$day)) {
  df$p.val[df$day == i] <- wilcox.test(df[df$day %in% 1:i,]$group_a, df[df$day %in% 1:i,]$group_b, alternative = 'g')$p.value
}

You get:

> df
  day group_a group_b     p.val
1   1       4       3 0.7928919
2   1       2       4 0.7928919
3   2       3       2 0.7768954
4   2       1       2 0.7768954
5   2       1       2 0.7768954
6   3       4       2 0.7084401
7   3       3       3 0.7084401
8   3       2       4 0.7084401
9   3       4       5 0.7084401

p- :

vec <- sapply(unique(df$day), 
              function(i) wilcox.test(df[df$day %in% 1:i,]$group_a, 
                                      df[df$day %in% 1:i,]$group_b,
                                      alternative = 'g')$p.value)

df2 <- data.frame(day = unique(df$day), p.val = vec)

:

> df2
  day     p.val
1   1 0.7928919
2   2 0.7768954
3   3 0.7084401
+3

Reduce accumulate = TRUE,

p_value <- do.call(rbind, lapply(Reduce(rbind, split(df, df$day), accumulate = TRUE), 
               function(i) wilcox.test(i$group_a, i$group_b, alternative = 'g')$p.value))
p_value
#          [,1]
#[1,] 0.7928919
#[2,] 0.7768954
#[3,] 0.7084401

,

final_df <- data.frame(day = unique(df$day), p_value)

final_df
#  day   p_value
#1   1 0.7928919
#2   2 0.7768954
#3   3 0.7084401
+1

This also works:

library(data.table)
setDT(df)
test_pvals <- sapply(as.list(unique(df[, day])), function(x){
  df[day <= x, wilcox.test(group_a, group_b, alternative = 'g')$p.val]
})
data.table(day = df[, unique(day)], p.val = test_pvals)
##    day     p.val
## 1:   1 0.7928919
## 2:   2 0.7768954
## 3:   3 0.7084401
0
source

Source: https://habr.com/ru/post/1676586/


All Articles