Transpose after splitting only the variable name

Question

Transpose after splitting only the variable name

I'm new to R, but I'm keen on mastering! I am working on a project to work and I am completely at a dead end! Any help is much appreciated!

I need to convert this data frame ...

   Brand       UK__Sales__YA   UK__Sales__MAT  CN__Sales__YA  CN__Sales__MAT
1  Snickers    100             110            90             95
2  Twix        50              60             30             35
3  Skittles    75              80             105            130

... to that

   Brand       Country     Year      Sales
1  Snickers    UK          YA        100
2  Snickers    UK          MAT       110
3  Snickers    CN          YA        90
4  Snickers    CN          MAT       95
5  Twix        UK          YA        50
6  Twix        UK          MAT       60
7  Twix        CN          YA        30
8  Twix        CN          MAT       35
9  Skittles    UK          YA        75
10 Skittles    UK          MAT       80
11 Skittles    CN          YA        105
12 Skittles    CN          MAT       130

As you can tell, I need to disable the first part and the last part of my Sales variables and create them as separate data stacks. I have additional countries and additional indicators in my data set, but I think that if you help me solve this problem, I can finish it. Thank you !! :-)

+4

r transpose

Derek lilley Dec 23 '17 at 14:59

source share

4 answers

tidyr - tidyverse :

library(tidyr)
library(dplyr)

df %>%
  gather(key, Sales, -Brand) %>%
  separate(key, c("Country", "delete", "Year"), sep = "__") %>%
  select(-delete) %>%
  arrange(Brand)

#       Brand Country Year Sales
# 1  Skittles      UK   YA    75
# 2  Skittles      UK  MAT    80
# 3  Skittles      CN   YA   105
# 4  Skittles      CN  MAT   130
# 5  Snickers      UK   YA   100
# 6  Snickers      UK  MAT   110
# 7  Snickers      CN   YA    90
# 8  Snickers      CN  MAT    95
# 9      Twix      UK   YA    50
# 10     Twix      UK  MAT    60
# 11     Twix      CN   YA    30
# 12     Twix      CN  MAT    35

, , %>% : (, df %>% gather(key, Sales, -Brand), , ). separate.

+2

JasonAizkalns 23 . '17 15:17

tidyverse. gather "" , extract "" "" ""

library(tidyr)
library(dplyr)
gather(df1, Var, Sales, -Brand) %>%
    extract(Var, into = c("Country", "Year"), "(\\w+)__\\w+__(\\w+)")
#      Brand Country Year Sales
#1  Snickers      UK   YA   100
#2      Twix      UK   YA    50
#3  Skittles      UK   YA    75
#4  Snickers      UK  MAT   110
#5      Twix      UK  MAT    60
#6  Skittles      UK  MAT    80
#7  Snickers      CN   YA    90
#8      Twix      CN   YA    30
#9  Skittles      CN   YA   105
#10 Snickers      CN  MAT    95
#11     Twix      CN  MAT    35
#12 Skittles      CN  MAT   130

data.table

library(data.table)
melt(setDT(df1), id.var = "Brand", value.names = "Sales")[, 
 c("Country", "Year") := tstrsplit(variable, "__")[-2]][, variable := NULL][]

0

akrun Dec 23 '17 at 15:19

source share

And here is the solution using the package reshape2.

new <- reshape2::melt(data, id.vars = "Brand")
new$Country <- sub("(^[^_]*)_.*$", "\\1", new$variable)
new$Year <- sub("^.*_([[:alpha:]]*$)", "\\1", new$variable)
new <- new[, c(1, 4, 5, 3)]
names(new)[4] <- "Sales"

head(new)
#     Brand Country Year Sales
#1 Snickers      UK   YA   100
#2     Twix      UK   YA    50
#3 Skittles      UK   YA    75
#4 Snickers      UK  MAT   110
#5     Twix      UK  MAT    60
#6 Skittles      UK  MAT    80

DATA

data <-
structure(list(Brand = c("Snickers", "Twix", "Skittles"), UK__Sales__YA = c(100L, 
50L, 75L), UK__Sales__MAT = c(110L, 60L, 80L), CN__Sales__YA = c(90L, 
30L, 105L), CN__Sales__MAT = c(95L, 35L, 130L)), .Names = c("Brand", 
"UK__Sales__YA", "UK__Sales__MAT", "CN__Sales__YA", "CN__Sales__MAT"
), class = "data.frame", row.names = c("1", "2", "3"))

0

Rui barradas Dec 23 '17 at 15:22

source share

G. Grothendieck · Accepted Answer · 2017-12-23T15:19:27+0000

1) dplyr/tidyr , , , . Variable Price Sales, Value , . , .

library(dplyr)
library(tidyr)

DF %>% 
  gather(new, Value, -Brand) %>%
  separate(new, c("Country", "Variable", "Year"), sep = "__") %>%
  spread(Variable, Value) %>%
  arrange(Brand, desc(Country), desc(Year))

:

      Brand Country Year Sales
1  Skittles      UK   YA    75
2  Skittles      UK  MAT    80
3  Skittles      CN   YA   105
4  Skittles      CN  MAT   130
5  Snickers      UK   YA   100
6  Snickers      UK  MAT   110
7  Snickers      CN   YA    90
8  Snickers      CN  MAT    95
9      Twix      UK   YA    50
10     Twix      UK  MAT    60
11     Twix      CN   YA    30
12     Twix      CN  MAT    35

, DF2, .

1a) , DF, DF2. , arrange , .

DF %>% 
  gather(new, Sales, -Brand) %>%
  separate(new, c("Country", "Year"), sep = "__Sales__") %>%
  arrange(Brand, desc(Country), desc(Year))

2) , reshape . , , rownames(long) <- NULL . DF2.

varying <- split(names(DF)[-1], sub(".*__(.*)__.*", "\\1", names(DF)[-1]))
long <- reshape(DF, dir = "long", idvar = "Brand", varying = varying, 
   v.names = names(varying))
out <- transform(long, Country = sub("__.*", "", time), Year = sub(".*__", "", time), 
   time = NULL)
rownames(out) <- NULL
o <- with(out, order(Brand, -xtfrm(Country), -xtfrm(Year)))
out <- out[o, ]
out

:

      Brand Sales Country Year
3  Skittles    75      UK   YA
6  Skittles    80      UK  MAT
9  Skittles   105      CN   YA
12 Skittles   130      CN  MAT
1  Snickers   100      UK   YA
4  Snickers   110      UK  MAT
7  Snickers    90      CN   YA
10 Snickers    95      CN  MAT
2      Twix    50      UK   YA
5      Twix    60      UK  MAT
8      Twix    30      CN   YA
11     Twix    35      CN  MAT

Lines <- "   Brand       UK__Sales__YA   UK__Sales__MAT  CN__Sales__YA  CN__Sales__MAT
1  Snickers    100             110            90             95
2  Twix        50              60             30             35
3  Skittles    75              80             105            130"

DF <- read.table(text = Lines)

# same as DF but with additional columns for Price
DF2 <- cbind(DF, setNames(10 * DF[2:5], sub("Sales", "Price", names(DF)[2:5])))

Transpose after splitting only the variable name

More articles: