Problems with iGraph and disparityfilter with characters and large numbers

After you did not receive a response from the authors who created the package, which I will discuss below, I thought that someone here could shed light on this problem.

I work with a large dataset that includes Origin Destination pairs and corresponding passengers going from A to B. Origin and Destination variables are encoded using the IATA airport name (3 letters). Original csv files can be found here https://github.com/FilipeamTeixeira/network . Please note that all 3 CSV files are the same, except that there is a variable as the characters of ORIGIN / DEST, others as numbers, and the 3rd as large. But for network purposes, they are exactly the same as they provide the same number of connections.

    ORIGIN  DEST    weight
     ABE    ATL     1530
     ABE    AVP     6
     ABE    BDL     2
     ABE    BOS     1
     ABE    BWI     3
     ABE    CLT     1053

After importing the files, I create a new graph with a <- graph_from_data_frame(netchr, directed = TRUE).

, , https://github.com/alessandrobessi/disparityfilter/blob/master/R/disparity_filter.R, /.

backbone(a).

, , Origin Destination , 0. , 2 3 , , .

, .

# Import network
# Imports csv

netchr <- read.csv("netchr.csv", header = TRUE,sep = ",", stringsAsFactors = FALSE)

netnumber <- read.csv("netnum.csv", header = TRUE, sep = ",", stringsAsFactors = FALSE)

netnumber2 <- read.csv("netnum2.csv", header = TRUE, sep = ",", stringsAsFactors = FALSE)

# Load igraph and dispfilter

library(igraph)
library(disparityfilter)

a <- graph_from_data_frame(netchr, directed = TRUE)

b <- graph_from_data_frame(netnumber, directed = TRUE)

c <- graph_from_data_frame(netnumber2, directed = TRUE)

# Create backbone network

backbone(a) # finds 0

backbone(b) # has results

backbone(c) # finds 0

, , iGraph , , .

+4
1

​​ disparityfilter. disparity_filter, backbone(), (), , , , node node . , , ( b ) , , , - .

- , , , : , .

, , . , "" ( ), "" ( , , t , ).

, , igraphdata:

# Load the requisite libraries
library(igraph)
library(disparityfilter)
library(igraphdata)

# We'll use the enron email network (b/c cool)
data(enron)

# convert it to a df
df <- igraph::as_data_frame(enron, what = 'edges')
summary(df) # we see nodes numbered from 1:184
#>       from             to          Time            Reciptype        
#>  Min.   :  1.0   Min.   :  1   Length:125409      Length:125409     
#>  1st Qu.: 64.0   1st Qu.: 64   Class :character   Class :character  
#>  Median :108.0   Median :113   Mode  :character   Mode  :character  
#>  Mean   :105.4   Mean   :108                                        
#>  3rd Qu.:156.0   3rd Qu.:156                                        
#>  Max.   :184.0   Max.   :184                                        
#>      Topic         LDC_topic     
#>  Min.   :0.000   Min.   :-1.000  
#>  1st Qu.:1.000   1st Qu.: 0.000  
#>  Median :1.000   Median : 0.000  
#>  Mean   :1.711   Mean   : 2.572  
#>  3rd Qu.:3.000   3rd Qu.: 1.000  
#>  Max.   :3.000   Max.   :32.000

# create a weights variable
df$weight <- df$Topic

# Create a char version of the nodes by appending 'char' to the number
dfchar <- df
dfchar$from <- paste0("char", dfchar$from)
dfchar$to <- paste0("char", dfchar$to)

# create a big num version
dfbnum <- df
dfbnum$from <- 1000 * dfbnum$from
dfbnum$to <- 1000 * dfbnum$to

data.frames

# Now convert the DFs back to graphs
smallnum <- graph_from_data_frame(df, directed = TRUE)

chars <- graph_from_data_frame(dfchar, directed = TRUE)

bignum <- graph_from_data_frame(dfbnum, directed = TRUE)

backbone() , :

## Now we document what you found: namely the anomolous behavior of backbone
newbbs <- backbone(smallnum)
dim(newbbs)
#> [1] 231   4

newbbc <- backbone(chars) 
dim(newbbc)
#> [1] 0 4

newbbb <- backbone(bignum)
dim(newbbb)
#> [1] 0 4

, , backbone() , 1:N.

, . backbone()? , , :

# now to demonstrate the indexing issue quickly, lets increment
# the node names just a bit, and see what gets returned.
# create a medium num version
dfmnum <- df
dfmnum$from <- dfmnum$from + 90 #add about half the number of nodes to the name
dfmnum$to <- dfmnum$to + 90

# convert back to graph
midnum <- graph_from_data_frame(dfmnum)
bbmid <- backbone(midnum)
dim(bbmid)
#> [1] 28  4

, - , 231 , 28! , node - ( ) .

?

disparityfilter github disparity_filter.R, . 58 disparity_filter , backbone(), . "" :

e <- igraph::as_data_frame(chars)[,1:2]
head(e)
    from      to
1 char25 char154
2 char25 char154
3 char30  char30
4 char30  char30
5 char30  char30
6 char30  char30

, from to , . , 63, disparity_filter() , (d) 1 for (u in which(d > 1)). switch 65 u:

w = switch(substr(mode, 1, 1),
      a = which(e[, 1] == u | e[, 2] == u),
      i = which(e[, 2] == u),
      o = which(e[, 1] == u)
)

, node . , chars , u 1, node char25. char25 e[,1], , , , . switch(), 76. , , , .

?

, , , ? , u e[,1]. , "":

d <- degree(smallnum)
which(d>1)
 25  30  39  52  61  64  66  67  93 100 115 125 138 141 146 156 164 168 170 
  1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19 

, ! , - , . , ; , :

renamed <- set.vertex.attribute(smallnum, "name", value=1:length(V(smallnum)))
bbs_problem_revealed <- backbone(renamed)
dim(bbs_problem_revealed)
[1] 9 4

, , , 9 ! , - . ? , , , . , , .

, , , . github.

+3

Source: https://habr.com/ru/post/1674720/


All Articles