Visualization of summary statistics per day, per group

Assume the following data frame:

head(df, 9)
         Day               variable     value
1 2015-10-18   Number_Flows.minimum  401.0000
2 2015-10-18   Number_Flows.maximum 2068.0000
3 2015-10-18   Number_Flows.average 1578.9474
4 2015-10-18 Number_srcaddr.minimum   95.0000
5 2015-10-18 Number_srcaddr.maximum  292.0000
6 2015-10-18 Number_srcaddr.average  222.6316
7 2015-10-18 Number_dstaddr.minimum   65.0000
8 2015-10-18 Number_dstaddr.maximum  411.0000
9 2015-10-18 Number_dstaddr.average  202.5789

What I want to do - this graph minimum, maximum, averagefor each Number_Flows, Number_srcaddretc. I would prefer to have strings displaying the value, but I am open to other methods, such as well, while I get (for example, the reproduced example below) a total of 22 charts (11 for each day).

I tried different things but no luck.

library(dplyr)
library(ggplot2)


ggplot(df %>% mutate(group = paste(Day, gsub('\\..*', '', variable), sep = '-')), aes(x = Day, y = value))+geom_bar(stat = 'identity')+facet_wrap(~group)
ggplot(df %>% mutate(group = paste(Day, gsub('\\..*', '', variable), sep = '-')), aes(x = Day, y = value))+geom_bar(stat = 'identity')+facet_wrap(~group)
ggplot(df %>% mutate(group = paste(Day, gsub('\\..*', '', variable), sep = '-')), aes(x = Day, y = value))+geom_line()+facet_wrap(~group)

DATA

dput(df)
structure(list(Day = structure(c(1445115600, 1445115600, 1445115600, 
1445115600, 1445115600, 1445115600, 1445115600, 1445115600, 1445115600, 
1445115600, 1445115600, 1445115600, 1445115600, 1445115600, 1445115600, 
1445115600, 1445115600, 1445115600, 1445115600, 1445115600, 1445115600, 
1445115600, 1445115600, 1445115600, 1445115600, 1445115600, 1445115600, 
1445115600, 1445115600, 1445115600, 1445115600, 1445115600, 1445115600, 
1445202000, 1445202000, 1445202000, 1445202000, 1445202000, 1445202000, 
1445202000, 1445202000, 1445202000, 1445202000, 1445202000, 1445202000, 
1445202000, 1445202000, 1445202000, 1445202000, 1445202000, 1445202000, 
1445202000, 1445202000, 1445202000, 1445202000, 1445202000, 1445202000, 
1445202000, 1445202000, 1445202000, 1445202000, 1445202000, 1445202000, 
1445202000, 1445202000, 1445202000), class = c("POSIXct", "POSIXt"
), tzone = ""), variable = c("Number_Flows.minimum", "Number_Flows.maximum", 
"Number_Flows.average", "Number_srcaddr.minimum", "Number_srcaddr.maximum", 
"Number_srcaddr.average", "Number_dstaddr.minimum", "Number_dstaddr.maximum", 
"Number_dstaddr.average", "Sum_packets.minimum", "Sum_packets.maximum", 
"Sum_packets.average", "Sum_duration_nannosecs.minimum", "Sum_duration_nannosecs.maximum", 
"Sum_duration_nannosecs.average", "Average_Duration.minimum", 
"Average_Duration.maximum", "Average_Duration.average", "Average_Bytes.minimum", 
"Average_Bytes.maximum", "Average_Bytes.average", "Bytes_per_packet.minimum", 
"Bytes_per_packet.maximum", "Bytes_per_packet.average", "Sum_of_Bytes.minimum", 
"Sum_of_Bytes.maximum", "Sum_of_Bytes.average", "Actual_Batch_Duration_secs.minimum", 
"Actual_Batch_Duration_secs.maximum", "Actual_Batch_Duration_secs.average", 
"packets_per_second.minimum", "packets_per_second.maximum", "packets_per_second.average", 
"Number_Flows.minimum", "Number_Flows.maximum", "Number_Flows.average", 
"Number_srcaddr.minimum", "Number_srcaddr.maximum", "Number_srcaddr.average", 
"Number_dstaddr.minimum", "Number_dstaddr.maximum", "Number_dstaddr.average", 
"Sum_packets.minimum", "Sum_packets.maximum", "Sum_packets.average", 
"Sum_duration_nannosecs.minimum", "Sum_duration_nannosecs.maximum", 
"Sum_duration_nannosecs.average", "Average_Duration.minimum", 
"Average_Duration.maximum", "Average_Duration.average", "Average_Bytes.minimum", 
"Average_Bytes.maximum", "Average_Bytes.average", "Bytes_per_packet.minimum", 
"Bytes_per_packet.maximum", "Bytes_per_packet.average", "Sum_of_Bytes.minimum", 
"Sum_of_Bytes.maximum", "Sum_of_Bytes.average", "Actual_Batch_Duration_secs.minimum", 
"Actual_Batch_Duration_secs.maximum", "Actual_Batch_Duration_secs.average", 
"packets_per_second.minimum", "packets_per_second.maximum", "packets_per_second.average"
), value = c(401, 2068, 1578.94736842105, 95, 292, 222.631578947368, 
65, 411, 202.578947368421, 4181, 130567, 33860.2631578947, 2647278, 
10876533, 5438303.63157895, 1543.937984, 20335.58603, 4202.062837, 
692.4193548, 77207.90476, 14689.4305788105, 231.6654261, 943.7592654, 
465.315475931579, 1244970, 123223816, 19865244, 9, 30, 27.1578947368421, 
179, 4352, 1265.94736842105, 609, 2352, 1578.94736842105, 89, 
299, 219.105263157895, 92, 402, 193.578947368421, 1124, 60473, 
19022.6842105263, 944317, 20088618, 5254959.84210526, 1550.602627, 
9749.356239, 3236.99523905263, 258.9441708, 17451.96293, 5789.86937011053, 
140.2998221, 717.4807734, 424.926870810526, 157697, 33505216, 
9510806.21052632, 5, 30, 24.9473684210526, 114, 2179, 772.947368421053
)), .Names = c("Day", "variable", "value"), row.names = c(NA, 
66L), class = "data.frame")
+4
source share
4 answers

I like to use lines for trends over time, and ribbons for ranges of values.

Like @docendo, I would first separate, but then I would spreadafter:

library(tidyverse)

df %>%
  separate(variable, c("type", "var"), sep = "\\.") %>% 
  spread(var, value) %>% 
  ggplot(aes(Day)) +
  geom_line(aes(y = average), size = 1) +
  geom_ribbon(aes(ymin = minimum, ymax = maximum), alpha = 0.2) +
  facet_wrap(~type, scales = 'free_y') +
  theme(axis.text.x=element_text(angle = 90, vjust = 0.5))

, .

+7

"variable" :

library(dplyr)
library(ggplot2)
library(tidyr)

df %>% 
  separate(variable, c("type", "var"), sep = "\\.") %>% 
  ggplot(aes(x = Day, y = value, color = var)) +
  geom_point() +
  facet_wrap(~type) + 
  theme(axis.text.x=element_text(angle = -90, hjust = 0))

Imgur

, y-, ..

+5

A baseR solution (or almost: it uses reshape2):

First you create the variables "type"and "stat"then splityours data.frameby the day, then reshapeyour data.frame should have the necessary form and, finally, plotwith barplot(I allow you to configure barplot). You can save the days (and use them later as the main name) by changing the call bit lapplyusing the names list.

df$type <- sub("([^.]+)\\..+", "\\1", df$variable)
df$stat <- sub("[^.]+\\.(.+)", "\\1", df$variable)

l_df <- split(df, df$Day)
library(reshape2)
par(mfrow=c(2, 1))
lapply(l_df, function(df_day){
                df_resh <- dcast(type~stat, value.var="value", data=df_day)
                row.names(df_resh) <- df_resh$type
                barplot(t(df_resh[, -1]), beside=TRUE, legend=TRUE, col=c("green", "blue", "red"))})

+4
source

You can try

library(stringr)
df$var1 <-  unlist(lapply(str_split(df$variable, "[.]"), "[", 1))
df$var2 <-  unlist(lapply(str_split(df$variable, "[.]"), "[", 2))  
ggplot(df, aes( x=var2, y= value)) + geom_bar(stat = 'identity') + facet_wrap(var1 ~ Day, scales = "free_y")

+3
source

Source: https://habr.com/ru/post/1669906/


All Articles