Calculate the number of files in a folder in a complex folder structure?

I created a simple one data.treeby importing the folder structure with the files inside it.

if (!require("pacman")) install.packages("pacman")
pacman::p_load_gh("trinker/pathr")

library(pathr)
library(data.tree)

folder_structure <- pathr::tree(path = "/Users/username/Downloads/top_level/",
 use.data.tree = T, include.files = T)

Now I would like to convert the object folder_structureto data.framewith one row in each folder and a column that determines how many files each folder contains. How can i do this?

For example, I have this very simple folder structure:

top_level_folder
    sub_folder_1
        file1.txt
    sub_folder_2
        file2.txt

The answer to the question will require the creation of a conclusion that looks like this:

Folders             Files
top_level_folder    0
sub_folder_1        1
sub_folder_2        1

list.dirs("/Users/username/Downloads/top_level/"), , . , , , (.. top_level_folder 0, top_level_folder 2 ).

, , Rails: https://github.com/rails/rails/archive/master.zip Rails .

+4
6

list.dirs() , , . .

# Get a vector of all the directories and subdirectories from this folder
dir <- "."
xs <- list.dirs(dir, recursive = TRUE)

list.files() , . . , list.files() . file.info() , , .

# Helper to check if something is folder or file
is_dir <- function(x) file.info(x)[["isdir"]]
is_file <- Negate(is_dir)

, . TRUE.

# Count the files in a single folder
count_files_in_one_dir <- function(dir) {
  files <- list.files(dir, full.names = TRUE)
  sum(is_file(files))
}

, .

# Vectorized version of the above
count_files_in_dir <- function(dir) {
  vapply(dir, count_files_in_one_dir, numeric(1), USE.NAMES = FALSE)
}

.

df <- tibble::data_frame(
  dir = xs,
  nfiles = count_files_in_dir(xs))

df
#> # A tibble: 688 x 2
#>                                                  dir nfiles
#>                                                <chr>  <dbl>
#>  1                                                 .     11
#>  2                                         ./.github      3
#>  3                                     ./actioncable      7
#>  4                                 ./actioncable/app      0
#>  5                          ./actioncable/app/assets      0
#>  6              ./actioncable/app/assets/javascripts      1
#>  7 ./actioncable/app/assets/javascripts/action_cable      5
#>  8                                 ./actioncable/bin      1
#>  9                                 ./actioncable/lib      1
#> 10                    ./actioncable/lib/action_cable      8
#> # ... with 678 more rows
+3

dplyr parse_path() pathr. tree parse_path, parse_path . . :

library(pathr)
library(dplyr)

fls <- dir("C:/RBuildTools/3.3", recursive = T, full.names = T) %>% 
parse_path() %>% 
index(4) %>% # this is where you indicate the level or "depth" 
             # of the folder of which want subfolder file counts
data.frame(folders = .) %>% 
group_by(folders) %>% 
tally() %>% 
arrange(n)

# if you want to get rid of all the files in your starting folder 
# just add a 
# filter(folder > 1) at the end of the dplyr chain

:

> fls
# A tibble: 12 × 2
        folders     n
         <fctr> <int>
1       COPYING     1
2    README.txt     1
3    Rtools.txt     1
4  unins000.dat     1
5  unins000.exe     1
6   VERSION.txt     1
7           bin    56
8    mingw_libs   200
9      texinfo5   356
10    gcc-4.6.3  3787
11     mingw_32 13707
12     mingw_64 14619
+1
dir.create("top_level_folder")
dir.create("top_level_folder/sub_folder_1")
dir.create("top_level_folder/sub_folder_2")
a <- "hello"
save(a,file = "top_level_folder/sub_folder_1/file1.txt")
save(a,file = "top_level_folder/sub_folder_2/file2.txt")

path <- "top_level_folder"
files   <- list.files(path, recursive=TRUE)
folders <- sapply(strsplit(files,"/"),function(x){x[length(x)-1]})
output <- setNames(as.data.frame(table(unlist(folders))),c("Folders","Files"))

all_folders <- data.frame(Folders = list.dirs(path,full.names=FALSE,recursive=TRUE),stringsAsFactors=FALSE)
all_folders$Folders[1] <- strsplit(path,",")[[1]][length(strsplit(path,",")[[1]])]

output <- merge(all_folders,output,all.x = TRUE)
output$Files[is.na(output$Files)] <- 0
output <- output[match(all_folders$Folders,output$Folders),]

#            Folders Files
# 3 top_level_folder     0
# 1     sub_folder_1     1
# 2     sub_folder_2     1
+1

, , list.dirs ( recursive = TRUE) , list.files ( recursive = FALSE) . data.frame,

library(purrr)

files <- .libPaths()[1] %>%    # omit for current directory or supply alternate path
    list.dirs() %>% 
    map_df(~list(path = .x, 
                 files = length(list.files(.x))))

files
#> # A tibble: 4,457 x 2
#>                                                                           path files
#>                                                                          <chr> <int>
#>  1              /Library/Frameworks/R.framework/Versions/3.4/Resources/library   314
#>  2        /Library/Frameworks/R.framework/Versions/3.4/Resources/library/abind     9
#>  3   /Library/Frameworks/R.framework/Versions/3.4/Resources/library/abind/help     5
#>  4   /Library/Frameworks/R.framework/Versions/3.4/Resources/library/abind/html     2
#>  5   /Library/Frameworks/R.framework/Versions/3.4/Resources/library/abind/Meta     6
#>  6      /Library/Frameworks/R.framework/Versions/3.4/Resources/library/abind/R     3
#>  7      /Library/Frameworks/R.framework/Versions/3.4/Resources/library/acepack    14
#>  8 /Library/Frameworks/R.framework/Versions/3.4/Resources/library/acepack/help     5
#>  9 /Library/Frameworks/R.framework/Versions/3.4/Resources/library/acepack/html     2
#> 10 /Library/Frameworks/R.framework/Versions/3.4/Resources/library/acepack/libs     2
#> # ... with 4,447 more rows

, ,

files <- do.call(rbind, lapply(list.dirs(.libPaths()[1]), function(path){
    data.frame(path = path, 
               files = length(list.files(path)), 
               stringsAsFactors = FALSE)
}))

head(files)
#>                                                                        path files
#> 1            /Library/Frameworks/R.framework/Versions/3.4/Resources/library   314
#> 2      /Library/Frameworks/R.framework/Versions/3.4/Resources/library/abind     9
#> 3 /Library/Frameworks/R.framework/Versions/3.4/Resources/library/abind/help     5
#> 4 /Library/Frameworks/R.framework/Versions/3.4/Resources/library/abind/html     2
#> 5 /Library/Frameworks/R.framework/Versions/3.4/Resources/library/abind/Meta     6
#> 6    /Library/Frameworks/R.framework/Versions/3.4/Resources/library/abind/R     3
+1

:

print(folder_structure, 
      files = function(node) sum(Get(node$children, 'isLeaf')), 
      filterFun = isNotLeaf,
      pruneMethod = NULL
)

- :

                                                     levelName files
1   data.tree                                                     16
2    ¦--data                                                       2
3    ¦--data_gen                                                   2
4    ¦--.git                                                       8
5    ¦   ¦--hooks                                                  9
6    ¦   ¦--info                                                   1
7    ¦   ¦--logs                                                   1
8    ¦   ¦   °--refs                                               1
9    ¦   ¦       ¦--heads                                          4
10   ¦   ¦       ¦--remotes                                        0
11   ¦   ¦       ¦   °--origin                                     5
12   ¦   ¦--objects                                                0
13   ¦   ¦   ¦--01                                                 4
14   ¦   ¦   ¦--02                                                 5
...

, .

+1

list.filesreturns all paths to files and directories. The function is is.fileabsent, but there is dir.exists. Since we know that all paths are actual nodes, those that are not directories will be considered files.

top_level <- '~/rails-master'
setwd(top_level)
subitems <- data.frame(
  path = list.files(
    include.dirs = TRUE,
    recursive    = TRUE
  ),
  stringsAsFactors = FALSE
)
subitems$is_file <- !dir.exists(subitems$path)

For each line, if the directory path, then this is its own directory path. If the path to the file, then its parent is the path to the directory. Then it is just a matter of counting how often is is_filetrue along the directory path.

subitems$dir_path <- ifelse(
  subitems$is_file,
  dirname(subitems$path),
  subitems$path
)
file_counts <- tapply(subitems$is_file, subitems$dir_path, sum)
result <- data.frame(
  Folders = names(file_counts),
  Files   = file_counts
)
0
source

Source: https://habr.com/ru/post/1679104/


All Articles