Insert characters into strings in R

I would like to insert a " &" between the letters (upper and lower case), but not before or after the letters, and replace each lowercase letter xwith tt$X==0case xwith tt$X==1, and each +with )|(, plus an opening bracket and a closing bracket around the entire line to get an expression that can be evaluated in R. For example, I have a string

st <- "AbC + de + FGHIJ"

The result should look like this:

"(tt$A==1 & tt$B==0 & tt$C==1) | (tt$D==0 & tt$E==0) | (tt$F==1 & tt$G==1 & tt$H==1 & tt$I==1 & tt$J==1)"

Can I easily do this with a function gsub()?

+4
source share
2 answers

. regexp- , .

> tt("aBc+b")
[1] "(tt$A==0 & tt$B==1 & tt$C==0+tt$B==0)"
> tt("aBc + b")
[1] "(tt$A==0 & tt$B==1 & tt$C==0) | (tt$B==0)"

. :

doChar = Vectorize(
    function(c){
        sprintf("tt$%s==%s",toupper(c),ifelse(c %in% LETTERS,"1","0"))
    }
)

doWord = Vectorize(function(W){
    cs = strsplit(W,"")[[1]]
    paste0("(",
           paste(doChar(cs),collapse=" & "),
           ")")
})

processString = function(st){
    parts = strsplit(st,"\\+")[[1]]
    parts = gsub(" ","",parts)
    paste0(doWord(parts),collapse=" | ")
}

, , ( ) :)

, tt, regexp:

> tt(st)==processString(st)
[1] TRUE

:

> processString("aBc + deF") == processString("aBc+deF")
[1] TRUE

, , . , tt , , foo$A tt$A, . regexp (, , , !).

0

,

st <- "AbC + de + FGHIJ"
t1 <- gsub("([a-z])", "tt\\$\\U\\1==0", st, perl = TRUE)
t2 <- gsub("((?<!\\$)[A-Z])", "tt\\$\\U\\1==1", t1, perl = TRUE)
t3 <- gsub("([0-9])(tt)", "\\1 & \\2", t2)
t4 <- gsub(" + ", ") | (", t3, fixed = TRUE)
t5 <- paste("(", t4, ")", sep = "")

st
# "AbC + de + FGHIJ"
t5
# "(tt$A==1 & tt$B==0 & tt$C==1) | (tt$D==0 & tt$E==0) | (tt$F==1 & tt$G==1 & tt$H==1 & tt$I==1 & tt$J==1)"

, :

t1 tt$X==0, X - , . \\U\\1, \\U , \\1 . - , .

, ( , tt), , $. gsub , lookbehind (?<!), \\$ . .

, . - , tt$ , . , "tt", "", .

"+". ") | (". fixed = TRUE, OR.

, , .

, , : a) b) . , t4 , :

t4 <- gsub(" ?\\+ ?", ") | (", t3)

, , 0 1, + fixed = TRUE. fixed = TRUE, .

, , .

parse_string <- function(string, object_name) {
  st <- string
  t1 <- gsub("([a-z])", paste0(object_name, "\\$\\U\\1==0"), st, perl = TRUE)
  t2 <- gsub("((?<!\\$)[A-Z])", paste0(object_name, "\\$\\U\\1==1"), t1, perl = TRUE)
  t3 <- gsub(paste0("([0-9])(", object_name, ")"), "\\1 & \\2", t2)
  t4 <- gsub(" ?\\+ ?", ") | (", t3)
  t5 <- paste("(", t4, ")", sep = "")
  return(t5)
}

> parse_string(st, "tt") == t5
# [1] TRUE
> parse_string(st, "foo")
# [1] "(foo$A==1 & foo$B==0 & foo$C==1) | (foo$D==0 & foo$E==0) | (foo$F==1 & foo$G==1 & foo$H==1 & foo$I==1 & foo$J==1)"
> parse_string("AbC+de+FGHIJ", "tt") == t5
# [1] TRUE
+3

Source: https://habr.com/ru/post/1622979/


All Articles