Schedule many csv files in one window

I have a list of 701 data csv. Each of them has the same number of columns (7), but a different number of rows (between 25000 and 28000).

Here is an excerpt from the first file:

Date,Week,Week Day,Hour,Price,Volume,Sale/Purchase
18/03/2011,11,5,1,-3000.00,17416,Sell
18/03/2011,11,5,1,-1001.10,17427,Sell
18/03/2011,11,5,1,-1000.00,18055,Sell
18/03/2011,11,5,1,-500.10,18057,Sell
18/03/2011,11,5,1,-500.00,18064,Sell
18/03/2011,11,5,1,-400.10,18066,Sell
18/03/2011,11,5,1,-400.00,18066,Sell
18/03/2011,11,5,1,-300.10,18068,Sell
18/03/2011,11,5,1,-300.00,18118,Sell

Now I am trying to build Volumeand Dateon condition that is Priceaccurate 200.00. And then I try to get one window where I can see the progress of the volume over time.

allenamen <- dir(pattern="*.csv")
alledat <- lapply(allenamen, read.csv, header = TRUE, 
   sep = ",", stringsAsFactors = FALSE)
verlauf <- function(a) {plot(Volume ~ Date, a, 
  data=subset(a, (Price=="200.00")), 
  ylim = c(15000, 45000), 
  xlim = as.Date(c("2011-12-30", "2013-01-20")), type = "l")}
lapply(alledat, verlauf)

But I get this error:

error in strsplit(log, NULL): non-character argument

How can I avoid the error?

+4
source share
3 answers

If you want to combine all subsets of for Price==200into a single graph, you can use the following function:

plotprice <- function(x) {
  files <- list.files(pattern="*.csv")
  df <- data.frame()
  for(i in 1:length(files)){
    xx <- read.csv(as.character(files[i]))
    xx <- subset(xx, Price==x)
    df <- rbind(df, xx)
  }
  df$Date <- as.Date(as.character(df$Date), format="%d/%m/%Y")
  plot(Volume ~ Date, df, ylim = c(15000, 45000), xlim = as.Date(c("2011-12-30", "2013-01-20")), type = "l")
}

plotprice(200) Price==200.


csv, :

ploteach <- function(x) {
  files <- list.files(pattern="*.csv")
  for(i in 1:length(files)){
    df <- read.csv(as.character(files[i]))
    df <- subset(df, Price==x)
    df$Date <- as.Date(as.character(df$Date), format="%d/%m/%Y")
    plot(Volume ~ Date, df, ylim = c(15000, 45000), xlim = as.Date(c("2011-12-30", "2013-01-20")), type = "l")
  }
}

ploteach(200)
+2

.

  • list.files, dir, . dir . , , .

  • header = TRUE sep = "," read.csv .

.

> fnames <- list.files(pattern  = "*.csv")
> read <- lapply(fnames, function(x){
    rd <- read.csv(x, stringsAsFactors = FALSE)
    subset(rd, Price == 200)
    })
> dat <- do.call(rbind, read)

dat.

+2

, lapply - read.csv 701 csv .

​​ , :

#
# function to read and subset data to avoid running out of RAM
read.subset <- function(dateiname){
   a <- read.csv(file = dateiname, header = TRUE, sep = ",",
                 stringsAsFactors = FALSE)
   a <- a[a$Price == 200.00,]
   print(gc())    # monitor and clean RAM after each file is read
   return(a)
}

* 2: ​​ read.subset

# function to read and subset data to avoid running out of RAM
read.subset.fast <- function(dateiname){
   # get data from csv into a data.frame
   a <- scan(file          = dateiname,
             what          = c(list(character()),
                               rep(list(numeric()),5),
                               list(character())),
             skip          = 1,  # skip header (equivalent to header = TRUE)
             sep           = ",")
   # transform efficiently list into data.frame
   attributes(a) <- list(class      = "data.frame",
                         row.names  = c(NA_integer_, length(a[[1]])),
                         names      = scan(file          = dateiname,
                                           what          = character(),
                                           skip          = 0,  
                                           nlines        = 1,  # just read first line to extract column names
                                           sep           = ","))
   # subset data
   a <- a[a$Price == 200.00,]
   print(gc())
   return(a)
}
#

, :

#
allenamen <- list.files(pattern="*.csv") # updated (@Richard Scriven)
# get a single data frame, instead of a list of 701 data frames
alledat <- do.call(rbind, lapply(allenamen, read.subset.fast))
#

Convert date to right format:

# get dates in dates format
alledat$Date <- as.Date(as.character(alledat$Date), format="%d/%m/%Y")

Then you are good to go, no function is required. Just speak:

plot(Volume ~ Date, 
     data = alledat,
     ylim = range(Volume),
     xlim = range(Date),
     type = "l")
0
source

Source: https://habr.com/ru/post/1542453/


All Articles