Transpose a dataset

I want to convert datasets similar to my.data below, and then sum the rows.

 my.data <- "landuse units year county.a county.b county.c county.d apple acres 2010 0 2 4 6 pear acres 2010 10 20 30 40 peach acres 2010 500 400 300 200" my.data2 <- read.table(textConnection(my.data), header = T) my.data2 

Required Result:

  counties all.fruit county.a 510 county.b 422 county.c 334 county.d 246 

I can do this using the code below. However, the following code seems to be a huge overkill. I hope there is a much simpler solution.

 # transpose the data set tmy.data2 <- t(my.data2) tmy.data2 <- as.data.frame(tmy.data2) # assign row names to the data set my.rows <- row.names(tmy.data2) transposed.data <- cbind(my.rows, tmy.data2) transposed.data # extract numbers to obtain row sums fruit.data <- as.data.frame(transposed.data[4:dim(transposed.data)[1], 2:dim(transposed.data)[2]]) fruit.data2 <- as.matrix(fruit.data) fruit.data3 <- matrix(as.numeric(fruit.data2), nrow=( dim(fruit.data2)[1] ), byrow=F) # sum fruit by county all.fruit <- rowSums(fruit.data3, na.rm=T) # create row names for summed fruit data counties <- my.rows[4:length(my.rows)] almost.final.data <- cbind(counties, all.fruit) really.final.data <- as.data.frame(almost.final.data) really.final.data[,2] <- as.numeric(as.character(really.final.data[,2])) really.final.data str(really.final.data) 

Thanks for any suggestions. I can use the code above, but I see this request as an opportunity to significantly improve my programming.

+4
source share
2 answers

I would just multiply the "county" columns, summarize them and create data.frame using the results:

 out <- colSums(my.data2[,grepl("county",colnames(my.data2))]) out2 <- data.frame(counties=names(out), all.fruit=out, row.names=NULL, stringsAsFactors=FALSE) 
+7
source

Why can't you just add columns instead?

 colSums(my.data2[, 4:7]) 

or

 library(plyr) numcolwise(sum)(my.data2) year county.a county.b county.c county.d 1 6030 510 422 334 246 > 

However, if you want to reorganize, there are many options. The reshape2 package provides a nice syntax:

 library(reshape2) > my.data.melt <- melt(my.data2, id.vars=c('units', 'year', 'landuse')) > my.data.melt units year landuse variable value 1 acres 2010 apple county.a 0 2 acres 2010 pear county.a 10 3 acres 2010 peach county.a 500 4 acres 2010 apple county.b 2 5 acres 2010 pear county.b 20 6 acres 2010 peach county.b 400 7 acres 2010 apple county.c 4 8 acres 2010 pear county.c 30 9 acres 2010 peach county.c 300 10 acres 2010 apple county.d 6 11 acres 2010 pear county.d 40 12 acres 2010 peach county.d 200 

Then I would use plyr :

 > library(plyr) > ddply(my.data.melt, .(variable), summarise, all.fruit=sum(value)) variable all.fruit 1 county.a 510 2 county.b 422 3 county.c 334 4 county.d 246 > 

You can also do this using the R aggregate base or the data.table package.

data.table

 > library(data.table) > my.data.melt <- as.data.table(melt(my.data2, id.vars=c('units', 'year', 'landuse'))) > my.data.melt[,list(all.fruit = sum(value)), by = variable] variable all.fruit 1: county.a 510 2: county.b 422 3: county.c 334 4: county.d 246 

or if you want it to stay in wide format

 > DT <- as.data.table(my.data2) > DT[, lapply(.SD, sum, na.rm=TRUE), .SDcols = grep("county",names(DT))]) county.a county.b county.c county.d 1: 510 422 334 246 # NB: This needs v1.8.3. Before that, an as.data.table() call was required as # the lapply(.SD,...) used to return a named list in this no grouping case. 

aggregate

 > aggregate(value~variable, my.data.melt, sum) variable value 1 county.a 510 2 county.b 422 3 county.c 334 4 county.d 246 
+9
source

Source: https://habr.com/ru/post/1432584/


All Articles