A data frame in which elements are not single columns

I have been using R for a long time, so I can’t say "hello, I'm new, explain it to me." But this is what I would like to ask because I came across this problem from time to time and every time I do not solve it and do not work on something else. But today I'm curious enough to ask.

I think of a data frame as a collection of columns having the same length. However, I know this is wrong. This is wrong, because matrices, elements with multiple columns can be inserted into the data frame. When I accidentally do this, I get a thing that does not print on the screen. there is

  • Apparently, inconsistent column names between what R says "head" and what it actually has, and

  • I can’t find a specific way to set the data frame: “Are you ordinary, one column per variable data frames” or “do you have some of these frustrating internal structures that make life difficult?”

You can see what I mean if you do. Run

 example(predict.lm)

This starts the prediction method and generates an output matrix called pt.

Then change the last step of your example, instead of outputting the matrix output as free, add it to the data frame named npk

npk$predict <- predict(npk.aov, type = "terms")

After that, what is npk? Is it still a data frame? Yes

> is.data.frame(npk)
  [1] TRUE

Hmm, notice how the head reports the column names:

> head(npk)
  block N P K yield predict.block  predict.N  predict.P
1     1 0 1 1  49.5    -0.8500000 -4.9250000  0.2083333
2     1 1 1 0  62.8    -0.8500000  4.9250000  0.2083333
3     1 0 0 0  46.8    -0.8500000 -4.9250000 -0.2083333
4     1 1 0 1  57.0    -0.8500000  4.9250000 -0.2083333
5     2 1 0 0  59.8     2.5750000  4.9250000 -0.2083333
6     2 1 1 1  58.5     2.5750000  4.9250000  0.2083333
 predict.K predict.N:P predict.N:K predict.P:K
1 -0.9583333   0.9416667   1.1750000   0.4250000
2  0.9583333  -2.8250000   1.1750000  -0.1416667
3  0.9583333   0.9416667   1.1750000  -0.1416667
4 -0.9583333   0.9416667  -3.5250000  -0.1416667
5  0.9583333   0.9416667   1.1750000  -0.1416667
6 -0.9583333  -2.8250000  -3.5250000   0.4250000
  predict.N:P:K
1     0.0000000
2     0.0000000
3     0.0000000
4     0.0000000
5     0.0000000
6     0.0000000

This makes it look like there are columns named "pred.block" or "pred.P", but not:

> colnames(npk)
[1] "block"   "N"       "P"       "K"       "yield"  
[6] "predict"

The colnames function will be more appropriately named column_or_whatever_else_we_find_here.

> npk$predict.P
NULL

"" , :

> npk$predict[ , "P"] 
         1          2          3          4          5 
 0.2083333  0.2083333 -0.2083333 -0.2083333 -0.2083333  
         6          7          8          9         10 
 0.2083333 -0.2083333  0.2083333  0.2083333  0.2083333 
        11         12         13         14         15 
-0.2083333 -0.2083333 -0.2083333  0.2083333 -0.2083333 
        16         17         18         19         20 
 0.2083333  0.2083333 -0.2083333 -0.2083333  0.2083333 
        21         22         23         24 
-0.2083333  0.2083333  0.2083333 -0.2083333 

, , , .

npk - , ,

> npk.new <- merge(npk, pt, by = "row.names", 
                   suffixes = c("", ".predict"))
> colnames(npk.new)
 [1] "Row.names"     "block"         "N"            
 [4] "P"             "K"             "yield"        
 [7] "block.predict" "N.predict"     "P.predict"    
 [10] "K.predict"     "N:P"           "N:K"          
 [13] "P:K"           "N:P:K"        

, , , , , " ".

: " , , ?" , . , .

, , "" . :

> sapply(npk, is.atomic)
  block       N       P       K   yield predict 
  TRUE    TRUE    TRUE    TRUE    TRUE    TRUE 
> sapply(npk, is.vector)
  block       N       P       K   yield predict 
  FALSE   FALSE   FALSE   FALSE    TRUE   FALSE 

, ,

> sapply(npk, is.matrix)
  block       N       P       K   yield predict 
  FALSE   FALSE   FALSE   FALSE   FALSE    TRUE 

, , , " ", " ", "". , , .

+4
2

,

is.simple <- function(x) {is.vector(x) | is.factor(x)}

sapply(npk, is.simple)

, ,

no.dims <- function(x) {is.null(dim(x))}
sapply(npk, no.dims)

, .

npk

npk <- cbind(npk, predict = predict(npk.aov, type = "terms"))

. .

+4

. class sapply()

   npk$predict <- predict(npk.aov, type = "terms")
   sapply(npk,class)
   #     block         N         P         K     yield   predict 
   #  "factor"  "factor"  "factor"  "factor" "numeric"  "matrix" 

II. dim sapply()

   sapply(npk,dim)
   # $block
   # NULL

   # $N
   # NULL

   # $P
   # NULL

   # $K
   # NULL

   # $yield
   # NULL

   # $predict
   # [1] 24  8

III. colnames sapply()

   sapply(npk,colnames)
   # $block
   # NULL

   # $N
   # NULL

   # $P
   # NULL

   # $K
   # NULL

   # $yield
   # NULL

   # $predict
   # [1] "block" "N"     "P"     "K"     "N:P"   "N:K"   "P:K"   "N:P:K"

IV. attributes sapply()

   sapply(npk,attributes)
   # $block
   # $block$levels
   # [1] "1" "2" "3" "4" "5" "6"

   # $block$class
   # [1] "factor"


   # $N
   # $N$levels
   # [1] "0" "1"

   # $N$class
   # [1] "factor"


   # $P
   # $P$levels
   # [1] "0" "1"

   # $P$class
   # [1] "factor"


   # $K
   # $K$levels
   # [1] "0" "1"

   # $K$class
   # [1] "factor"


   # $yield
   # NULL

   # $predict
   # $predict$dim
   # [1] 24  8

   # $predict$dimnames
   # $predict$dimnames[[1]]
   #  [1] "1"  "2"  "3"  "4"  "5"  "6"  "7"  "8"  "9"  "10" "11" "12" "13"  "14" "15"
   # [16] "16" "17" "18" "19" "20" "21" "22" "23" "24"

   # $predict$dimnames[[2]]
   # [1] "block" "N"     "P"     "K"     "N:P"   "N:K"   "P:K"   "N:P:K"


   # $predict$constant
   # [1] 54.875

V. npk

   class(npk)
   # [1] "data.frame"

   mode(npk)
   # [1] "list"

   typeof(npk)
   # [1] "list"

   attributes(npk)
   # $names
   # [1] "block"   "N"       "P"       "K"       "yield"   "predict"

   # $row.names
   #  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24

   # $class
   # [1] "data.frame"


   str(npk)
   # 'data.frame':   24 obs. of  6 variables:
   #  $ block  : Factor w/ 6 levels "1","2","3","4",..: 1 1 1 1 2 2 2 2 3 3  ...
   #  $ N      : Factor w/ 2 levels "0","1": 1 2 1 2 2 2 1 1 1 2 ...
   #  $ P      : Factor w/ 2 levels "0","1": 2 2 1 1 1 2 1 2 2 2 ...
   #  $ K      : Factor w/ 2 levels "0","1": 2 1 1 2 1 2 2 1 1 2 ...
   #  $ yield  : num  49.5 62.8 46.8 57 59.8 58.5 55.5 56 62.8 55.8 ...
   #  $ predict: num [1:24, 1:8] -0.85 -0.85 -0.85 -0.85 2.57 ...
   #  ..- attr(*, "dimnames")=List of 2
   #   .. ..$ : chr  "1" "2" "3" "4" ...
   #   .. ..$ : chr  "block" "N" "P" "K" ...
   #   ..- attr(*, "constant")= num 54.9


   dim(npk)
   # [1] 24  6

   dimnames(npk)
   # [[1]]
   #  [1] "1"  "2"  "3"  "4"  "5"  "6"  "7"  "8"  "9"  "10" "11" "12" "13" "14" "15"
   # [16] "16" "17" "18" "19" "20" "21" "22" "23" "24"

   # [[2]]
   # [1] "block"   "N"       "P"       "K"       "yield"   "predict"


  summary.default(npk)
  #         Length    Class    Mode   
  #  block      24    factor numeric
  #  N          24    factor numeric
  #  P          24    factor numeric
  #  K          24    factor numeric
  #  yield      24    -none- numeric
  #  predict   192    -none- numeric

VI. : psych

  library(psych)
  list(npk_1_5 = describe(npk[,-6]),npk_6 = describe(npk[,6]))
  $npk_1_5
  #        vars  n  mean   sd median trimmed  mad  min  max range skew kurtosis   se
  # block*    1 24  3.50 1.74   3.50    3.50 2.22  1.0  6.0   5.0 0.00    -1.41 0.36
  # N*        2 24  1.50 0.51   1.50    1.50 0.74  1.0  2.0   1.0 0.00    -2.08 0.10
  # P*        3 24  1.50 0.51   1.50    1.50 0.74  1.0  2.0   1.0 0.00    -2.08 0.10
  # K*        4 24  1.50 0.51   1.50    1.50 0.74  1.0  2.0   1.0 0.00    -2.08 0.10
  # yield     5 24 54.88 6.17  55.65   54.75 6.15 44.2 69.5  25.3 0.24    -0.51 1.26

  # $npk_6
  #       vars  n mean   sd median trimmed  mad   min  max range  skew kurtosis   se
  # block    1 24    0 3.86   0.31   -0.11 5.13 -4.75 5.90 10.65  0.10    -1.41 0.79
  # N        2 24    0 5.03   0.00    0.00 7.30 -4.92 4.92  9.85  0.00    -2.08 1.03
  # P        3 24    0 0.21   0.00    0.00 0.31 -0.21 0.21  0.42  0.00    -2.08 0.04
  # K        4 24    0 0.98   0.00    0.00 1.42 -0.96 0.96  1.92  0.00    -2.08 0.20
  # N:P      5 24    0 1.67   0.94    0.19 0.00 -2.82 0.94  3.77 -1.08    -0.86 0.34
  # N:K      6 24    0 2.08   1.17    0.23 0.00 -3.52 1.17  4.70 -1.08    -0.86 0.42
  # P:K      7 24    0 0.25  -0.14   -0.03 0.00 -0.14 0.43  0.57  1.08    -0.86 0.05
  # N:P:K    8 24    0 0.00   0.00    0.00 0.00  0.00 0.00  0.00   NaN      NaN 0.00
0

Source: https://habr.com/ru/post/1650953/


All Articles