regex - Extracting text strings using data.table in R -


i have data.table similar 1 follows

data

library(data.table) dt <- structure(list(n = 1:6, vn = c("v1", "v3", "v6", "v7a", "v18",  "v23"), t1 = c("bigby (wolf)", "white", "red (rose)", "piggy (straw)",  "(curse) beast", "prince"), t2 = c("jack (bean)", "snow (dwarves)",  "beard (blue)", "bhageera (jungle) mowgli (book)", "beauty",  "glass (slipper)"), t3 = c("hk (34)", "vl (r45)", "tg (h5)",  "tt (hg) (45)", "gh", "vlp"), val = c(36, 25, 0.84, 12, 78, 258 )), .names = c("n", "vn", "t1", "t2", "t3", "val"), class = "data.frame", row.names = c(na,  -6l))  setdt(dt)  dt    n  vn            t1                              t2           t3    val 1: 1  v1  bigby (wolf)                     jack (bean)      hk (34)  36.00 2: 2  v3         white                  snow (dwarves)     vl (r45)  25.00 3: 3  v6    red (rose)                    beard (blue)      tg (h5)   0.84 4: 4 v7a piggy (straw) bhageera (jungle) mowgli (book) tt (hg) (45)  12.00 5: 5 v18 (curse) beast                          beauty           gh  78.00 6: 6 v23        prince                 glass (slipper)          vlp 258.00 

i want extract strings within parentheses columns t1 , t2 new column c.

i can single rows follows.

rowwise calculations

setdf(dt) dtf <- c("t1", "t2") paste(unique(unlist(regmatches(dt[4,dtf], gregexpr("(?=\\().*?(?<=\\))", dt[4,dtf], perl=t)))), collapse=" ") [1] "(straw) (jungle) (book)" paste(unique(unlist(regmatches(dt[3,dtf], gregexpr("(?=\\().*?(?<=\\))", dt[3,dtf], perl=t)))), collapse=" ") [1] "(rose) (blue)" 

i not able similar results using data.table.

try data.table

setdt(dt) dt[, c := paste(unique(unlist(regmatches(get(dtf), gregexpr("(?=\\().*?(?<=\\))", get(dtf), perl=t)))), collapse=" ")] 

how use data.table desired result?

desired result

out <- structure(list(n = 1:6, vn = c("v1", "v3", "v6", "v7a", "v18",  "v23"), t1 = c("bigby (wolf)", "white", "red (rose)", "piggy (straw)",  "(curse) beast", "prince"), t2 = c("jack (bean)", "snow (dwarves)",  "beard (blue)", "bhageera (jungle) mowgli (book)", "beauty",  "glass (slipper)"), t3 = c("hk (34)", "vl (r45)", "tg (h5)",  "tt (hg) (45)", "gh", "vlp"), val = c(36, 25, 0.84, 12, 78, 258 ), c = c("(wolf) (bean)", "(dwarves)", "(rose) (blue)", "(straw) (jungle) (book)",  "(curse)", "(slipper)")), .names = c("n", "vn", "t1", "t2", "t3",  "val", "c"), class = "data.frame", row.names = c(na, -6l)) out   n  vn            t1                              t2           t3    val                       c 1 1  v1  bigby (wolf)                     jack (bean)      hk (34)  36.00           (wolf) (bean) 2 2  v3         white                  snow (dwarves)     vl (r45)  25.00               (dwarves) 3 3  v6    red (rose)                    beard (blue)      tg (h5)   0.84           (rose) (blue) 4 4 v7a piggy (straw) bhageera (jungle) mowgli (book) tt (hg) (45)  12.00 (straw) (jungle) (book) 5 5 v18 (curse) beast                          beauty           gh  78.00                 (curse) 6 6 v23        prince                 glass (slipper)          vlp 258.00               (slipper) 

you can use by , .sdcols this.

setdt(dt) dtf <- c("t1", "t2") dt[, c := paste(unique(unlist(regmatches(.sd, gregexpr("(?=\\().*?(?<=\\))", .sd, perl=t)))),                  collapse=" "),     = n,     .sdcols = dtf] dt ## n  vn            t1                              t2           t3    val                       c ## 1: 1  v1  bigby (wolf)                     jack (bean)      hk (34)  36.00           (wolf) (bean) ## 2: 2  v3         white                  snow (dwarves)     vl (r45)  25.00               (dwarves) ## 3: 3  v6    red (rose)                    beard (blue)      tg (h5)   0.84           (rose) (blue) ## 4: 4 v7a piggy (straw) bhageera (jungle) mowgli (book) tt (hg) (45)  12.00 (straw) (jungle) (book) ## 5: 5 v18 (curse) beast                          beauty           gh  78.00                 (curse) ## 6: 6 v23        prince                 glass (slipper)          vlp 258.00               (slipper) 

Comments

Popular posts from this blog

android - MPAndroidChart - How to add Annotations or images to the chart -

javascript - Add class to another page attribute using URL id - Jquery -

firefox - Where is 'webgl.osmesalib' parameter? -