Determining whether two columns contain a negative and a positive value in R

Here is the code to generate some sample data:

set.seed(1000)
dat <- data.frame(A = runif(5, -5, 5),
                  B = runif(5, -5, 5),
                  C = runif(5, -5, 5))

Data:

          A          B         C
1 -1.721213 -4.3226204 -1.500625
2  2.588465  2.3871486  2.554616
3 -3.860636  0.8353505 -1.829158
4  1.907551 -2.8422860  3.658128
5  0.164024 -2.4387760  2.641607

I want to compare each column and return TRUEeither FALSEdepending on whether there is a negative or a positive value when we go down the line. For example, in row 4 of columns A and B we would get TRUE. Because one is positive and the other is negative. If it were replaced (a negative value in for this string, but a positive value in B), it would also return TRUE.

However, if one of the two compared values ​​is between 1 and -1, the comparison will not happen, and it will simply return NA. Here is what the final result for this example looks like:

          A          B         C    AB    BC    AC
1 -1.721213 -4.3226204 -1.500625 FALSE FALSE FALSE
2  2.588465  2.3871486  2.554616 FALSE FALSE FALSE
3 -3.860636  0.8353505 -1.829158    NA    NA FALSE
4  1.907551 -2.8422860  3.658128  TRUE  TRUE FALSE
5  0.164024 -2.4387760  2.641607    NA  TRUE    NA

, :

if((dat$A > 1 & datB < -1) | (dat$A < -1 & dat$B > -1) == TRUE)...

, .


:

, , . mpalanco - . , DMC, - , .

+4
4

, , , {A, B, C}, :

library(tidyr)
library(dplyr)

# get original names
orig_names <- names(dat)

# add a row indicator
dat <- dat %>% mutate(k = row_number())

dat2 <- dat %>%
  # reshape to long
  gather(letter, value, A:C) %>%
  # change value to {-1, 1}
  mutate(
    value = ifelse(value <= -1, -1, ifelse(value >= 1, 1, NA)),
    letter = as.character(letter)
    )

# create a placeholder data frame for result  
d_new <- expand.grid(
  V1 = orig_names, 
  V2 = orig_names, 
  k = 1:nrow(dat),
  stringsAsFactors = FALSE
  ) %>%
  filter(V1 < V2)

# compute result
result <- d_new %>% 
  left_join(dat2, by = c("V1" = "letter", "k" = "k")) %>%
  left_join(dat2, by = c("V2" = "letter", "k" = "k")) %>%
  mutate(
    result = (value.x != value.y),
    combo = paste0(V1, V2)
    ) %>%
  select(-starts_with("value"), -V1, -V2) %>%
  spread(combo, result)

# join with original data
dat %>% left_join(result)

##           A          B         C k    AB    AC    BC
## 1 -1.721213 -4.3226204 -1.500625 1 FALSE FALSE FALSE
## 2  2.588465  2.3871486  2.554616 2 FALSE FALSE FALSE
## 3 -3.860636  0.8353505 -1.829158 3    NA FALSE    NA
## 4  1.907551 -2.8422860  3.658128 4  TRUE FALSE  TRUE
## 5  0.164024 -2.4387760  2.641607 5    NA    NA  TRUE
+2

:

is_between <- function(x, a, b) {
  x > a & x < b
}

makeCol <- function(col1, col2) {
  ifelse(
    is_between(col1, -1, 1) | is_between(col2, -1, 1),
    NA,
    !as.logical(sign(col1) + sign(col2))
  )
}

dat$AB <- makeCol(dat$A, dat$B)
dat$BC <- makeCol(dat$B, dat$C)
dat$AC <- makeCol(dat$A, dat$C)

, @akrun - :

combn(seq_along(dat), 2, function(x) { makeCol(dat[, x[1]], dat[, x[2]]) })
+2

That would be my decision.

dat[dat$A*dat$B<0 & abs(dat$A)>=1 & abs(dat$B)>=1,4]<-TRUE
dat[dat$A*dat$B>0 & abs(dat$A)>=1 & abs(dat$B)>=1,4]<-FALSE

dat[dat$C*dat$B<0 & abs(dat$C)>=1 & abs(dat$B)>=1,5]<-TRUE
dat[dat$C*dat$B>0 & abs(dat$C)>=1 & abs(dat$B)>=1,5]<-FALSE

dat[dat$A*dat$C<0 & abs(dat$A)>=1 & abs(dat$C)>=1,6]<-TRUE
dat[dat$A*dat$C>0 & abs(dat$A)>=1 & abs(dat$C)>=1,6]<-FALSE

colnames(dat)<-c("A","B","C","AB","BC","AC")
+1
source

A very simple step-by-step approach:

# To keep original data
dat2 <- dat

# Assign NA to values between -1 and -1
dat[ifelse(dat >-1 & dat < 1, TRUE, FALSE)] <- NA

# Create three new columns   
dat[4:6] <- data.frame(AB = dat$A*dat$B, BC = dat$B*dat$C, AC = dat$A*dat$C)

# If the are positive will be FALSE
dat[4:6] <- ifelse(dat[, 4:6] > 0, FALSE, TRUE)

# Final result
 cbind(dat2[, 1:3], dat[, 4:6])

Output:

          A          B         C    AB    BC    AC
1 -1.721213 -4.3226204 -1.500625 FALSE FALSE FALSE
2  2.588465  2.3871486  2.554616 FALSE FALSE FALSE
3 -3.860636  0.8353505 -1.829158    NA    NA FALSE
4  1.907551 -2.8422860  3.658128  TRUE  TRUE FALSE
5  0.164024 -2.4387760  2.641607    NA  TRUE    NA
+1
source

Source: https://habr.com/ru/post/1609837/


All Articles