Multiple Response Analysis

df1=data.frame(c("male","female","male"),c("1","2","3","4","5","6"),seq(141,170)) names(df1) = c("gender","age","height") df1$age <- factor(df1$age, levels=c(1,2,3,4,5,6), labels=c("16-24","25-34","35-44","45-54","55-64","65+")) q1a=c(1,0,1,0,0,1);q1b=c(0,0,2,2,2,0);q1c=c(0,0,3,3,0,3) #1,2 and 3 used to be compatible with existing datasets. Could change all to 1 if necessary. df2=data.frame(q1a=q1a,q1b=q1b,q1c=q1c); df1 <- cbind(df1,df2) rm(q1a,q1b,q1c,df2) 

I want to repeat the analysis of several answers to SPSS questions in R.

I am currently using this code:

 #creating function for analysing questions with grouped data multfreqtable <- function(a, b, c){ # number of respondents (for percent of cases) totrep=sum(a==1|b==2|c==3) #creating frequency table table_a=data.frame("a",sum(a==1)) names(table_a)=c("question","freq") table_b=data.frame("b",sum(b==2)) names(table_b)=c("question","freq") table_c=data.frame("c",sum(c==3)) names(table_c)=c("question","freq") table_question <-rbind(table_a,table_b,table_c) #remove individual question tables rm(table_a,table_b,table_c) #adding total total=as.data.frame("Total") totalsum=(sum(table_question$freq,na.rm=TRUE)) totalrow=cbind(total,totalsum) names(totalrow)=c("question","freq") table_question=rbind(table_question,totalrow) #adding percentage column to frequency table percentcalc=as.numeric(table_question$freq) percent=(percentcalc/totalsum)*100 table_question<-cbind(table_question,percent) #adding percent of cases column to frequency table poccalc=as.numeric(table_question$freq) percentofcases=(poccalc/totrep)*100 table_question<-cbind(table_question,percentofcases) #print percent of cases value total_respondents <<- data.frame(totrep) #remove all unnecessary data and values rm(total,totalsum,totalrow,b,c,percent,percentcalc,percentofcases,totrep,poccalc) return(table_question) } #calling function - must tie to data.frame using $ !!! q1_frequency<-multfreqtable(df1$q1a,df1$q1b,df1$q1c) #renaming percent of cases - This is very important while using current method total_respondents_q1 <- total_respondents rm(total_respondents) 

Creating this table as a result of:

Output table

I am looking for a more efficient way to do this, which would ideally not require the function to be edited if there were more or less multiple choice questions.

+6
source share
2 answers

Your function is actually too complex for what you need to do. I think that such a function should work and be more flexible.

 multfreqtable = function(data, question.prefix) { # Find the columns with the questions a = grep(question.prefix, names(data)) # Find the total number of responses b = sum(data[, a] != 0) # Find the totals for each question d = colSums(data[, a] != 0) # Find the number of respondents e = sum(rowSums(data[,a]) !=0) # d + b as a vector. This is your overfall frequency f = as.numeric(c(d, b)) data.frame(question = c(names(d), "Total"), freq = f, percent = (f/b)*100, percentofcases = (f/e)*100 ) } 

Add another question to your example dataset:

 set.seed(1); df1$q2a = sample(c(0, 1), 30, replace=T) set.seed(2); df1$q2b = sample(c(0, 2), 30, replace=T) set.seed(3); df1$q2c = sample(c(0, 3), 30, replace=T) 

Create a response table "q1":

 > multfreqtable(df1, "q1") question freq percent percentofcases 1 q1a 15 33.33333 60 2 q1b 15 33.33333 60 3 q1c 15 33.33333 60 4 Total 45 100.00000 180 

Create a response table "q2":

 > multfreqtable(df1, "q2") question freq percent percentofcases 1 q2a 14 31.11111 53.84615 2 q2b 13 28.88889 50.00000 3 q2c 18 40.00000 69.23077 4 Total 45 100.00000 173.07692 

Tables for several questions

Here is a modified version of the function that allows you to immediately create a list of tables for several questions:

 multfreqtable = function(data, question.prefix) { z = length(question.prefix) temp = vector("list", z) for (i in 1:z) { a = grep(question.prefix[i], names(data)) b = sum(data[, a] != 0) d = colSums(data[, a] != 0) e = sum(rowSums(data[,a]) !=0) f = as.numeric(c(d, b)) temp[[i]] = data.frame(question = c(sub(question.prefix[i], "", names(d)), "Total"), freq = f, percent = (f/b)*100, percentofcases = (f/e)*100 ) names(temp)[i] = question.prefix[i] } temp } 

Examples:

 > multfreqtable(df1, "q1") $q1 question freq percent percentofcases 1 a 15 33.33333 60 2 b 15 33.33333 60 3 c 15 33.33333 60 4 Total 45 100.00000 180 > test1 = multfreqtable(df1, c("q1", "q2")) > test1 $q1 question freq percent percentofcases 1 a 15 33.33333 60 2 b 15 33.33333 60 3 c 15 33.33333 60 4 Total 45 100.00000 180 $q2 question freq percent percentofcases 1 a 14 31.11111 53.84615 2 b 13 28.88889 50.00000 3 c 18 40.00000 69.23077 4 Total 45 100.00000 173.07692 > test1$q1 question freq percent percentofcases 1 a 15 33.33333 60 2 b 15 33.33333 60 3 c 15 33.33333 60 4 Total 45 100.00000 180 
+8
source

I noticed that this message is quite old, but I could not find a more relevant solution. Here my version is based on the dplyr / tidyverse approach.

 mult_resp = function(df1, mv_q = c("q1a", "q1b", "q1c")){ df2 = df1 %>% mutate(id = rownames(.)) %>% #row id for counting n_cases select(id, everything()) %>% mutate_at(mv_q, ~ ifelse(. != 0, 1, 0)) %>% gather(question, resp,-id, -gender,-age,-height) #count number of cases excluding "all zeros" cases n_cases = df2 %>% group_by(id) %>% summarise(n = sum(resp)) %>% summarise(sum(n > 0)) #output table res = df2 %>% group_by(question) %>% summarise(freq = sum(resp)) %>% mutate( percent = freq/sum(freq) *100, percent_of_cases = freq/as.numeric(n_cases)*100 ) %>% rbind(., data.frame(question ="Total", freq =sum(.$freq, na.rm=TRUE), percent =sum(.$percent, na.rm=TRUE), percent_of_cases = sum(.$percent_of_cases, na.rm=TRUE) ) ) res } 

Example:

 > mult_resp(df1, mv_q = c("q1a", "q1b", "q1c")) # A tibble: 4 x 4 question freq percent percent_of_cases <chr> <dbl> <dbl> <dbl> 1 q1a 15 33.3 60 2 q1b 15 33.3 60 3 q1c 15 33.3 60 4 Total 45 100. 180 
0
source

Source: https://habr.com/ru/post/1396246/


All Articles