R/separate.R
separate.Rd
Given either a regular expression or a vector of character positions,
separate()
turns a single character column into multiple columns.
separate( data, col, into, sep = "[^[:alnum:]]+", remove = TRUE, convert = FALSE, extra = "warn", fill = "warn", ... )
data  A data frame. 

col  Column name or position. This is passed to
This argument is passed by expression and supports quasiquotation (you can unquote column names or column positions). 
into  Names of new variables to create as character vector.
Use 
sep  Separator between columns. If character, If numeric, 
remove  If 
convert  If NB: this will cause string 
extra  If

fill  If

...  Additional arguments passed on to methods. 
library(dplyr) # If you want to split by any nonalphanumeric value (the default): df < data.frame(x = c(NA, "x.y", "x.z", "y.z")) df %>% separate(x, c("A", "B"))#> A B #> 1 <NA> <NA> #> 2 x y #> 3 x z #> 4 y z#> B #> 1 <NA> #> 2 y #> 3 z #> 4 z# If every row doesn't split into the same number of pieces, use # the extra and fill arguments to control what happens: df < data.frame(x = c("x", "x y", "x y z", NA)) df %>% separate(x, c("a", "b"))#> Warning: Expected 2 pieces. Additional pieces discarded in 1 rows [3].#> Warning: Expected 2 pieces. Missing pieces filled with `NA` in 1 rows [1].#> a b #> 1 x <NA> #> 2 x y #> 3 x y #> 4 <NA> <NA># The same behaviour as previous, but drops the c without warnings: df %>% separate(x, c("a", "b"), extra = "drop", fill = "right")#> a b #> 1 x <NA> #> 2 x y #> 3 x y #> 4 <NA> <NA># Opposite of previous, keeping the c and filling left: df %>% separate(x, c("a", "b"), extra = "merge", fill = "left")#> a b #> 1 <NA> x #> 2 x y #> 3 x y z #> 4 <NA> <NA>#> Warning: Expected 3 pieces. Missing pieces filled with `NA` in 2 rows [1, 2].#> a b c #> 1 x <NA> <NA> #> 2 x y <NA> #> 3 x y z #> 4 <NA> <NA> <NA># To only split a specified number of times use extra = "merge": df < data.frame(x = c("x: 123", "y: error: 7")) df %>% separate(x, c("key", "value"), ": ", extra = "merge")#> key value #> 1 x 123 #> 2 y error: 7# Use regular expressions to separate on multiple characters: df < data.frame(x = c(NA, "x?y", "x.z", "y:z")) df %>% separate(x, c("A","B"), sep = "([.?:])")#> A B #> 1 <NA> <NA> #> 2 x y #> 3 x z #> 4 y z# convert = TRUE detects column classes: df < data.frame(x = c("x:1", "x:2", "y:4", "z", NA)) df %>% separate(x, c("key","value"), ":") %>% str#> Warning: Expected 2 pieces. Missing pieces filled with `NA` in 1 rows [4].#> 'data.frame': 5 obs. of 2 variables: #> $ key : chr "x" "x" "y" "z" ... #> $ value: chr "1" "2" "4" NA ...#> Warning: Expected 2 pieces. Missing pieces filled with `NA` in 1 rows [4].#> 'data.frame': 5 obs. of 2 variables: #> $ key : chr "x" "x" "y" "z" ... #> $ value: int 1 2 4 NA NA