Quantcast
Channel: Active questions tagged r - Stack Overflow
Viewing all articles
Browse latest Browse all 201839

Stacking multiple columns using pivot longer in R

$
0
0

I am trying to change my data from wide to long on r using pivot_longer. There appear to be a few people having similar issues on here but I have been unable to adapt their solutions to my data. I have attached a picture of example data in the wide data example and what I am trying to achieve in the long data example.

In summary I have a time and reference column which refer to all columns, I also have multiple columns of group, subject, ID, xcoordinate ycoordinate in the form of: group1, subject1. ID1, xcoord1 ycoord1, group2, subject2, ID2, xcoord2, ycoord2 and so on... What I want is a long table with columns:

time, reference, group, subject, ID, xcoord, ycoord.

With the 5 columns stacking their respective numbered columns, and the time and reference columns repeating for the relevant stacks.

 df %>%
 pivot_longer(cols = -c(time, reference),
names_to = c("group", "subject", "ID", "xcoord", "ycoord")

My understanding is that I require to use the names_pattern function, although I cant seem to get that to work, and I cant find anything clear describing How I should be using it. I should say my data is much wider than the example data, so can't really rely on numbering of columns.

appreciate any help

wide data example

    time reference group1 subject1 ID1 xcoord1 ycoord1 group2 subject2 ID2 xcoord2 ycoord2 group3 subject3 ID3 xcoord3 ycoord3
1  00:01   4097365      1        4   1    7.44   38.16      0       21   2   33.90   47.26      1       15   3   21.53    2.67
2  00:02   4097366      1        4   1    9.84   37.03      0       21   2   32.98   48.47      1       15   3   21.82    2.95
3  00:03   4097367      1        4   1   12.01   35.83      0       21   2   30.17   50.33      1       15   3   22.06    4.45
4  00:04   4097368      1        4   1   12.15   34.17      0       21   2   29.85   50.52      1       15   3   23.50    4.75
5  00:05   4097369      1        4   1   15.27   32.94      0       21   2   28.39   51.30      1       15   3   24.25    4.76
6  00:06   4097370      1        4   1   18.96   31.98      0       21   2   28.39   52.36      1       15   3   25.31    6.57
7  00:07   4097371      1        4   1   22.50   31.13      0       21   2   26.59   53.14      1       15   3   26.05    7.04
8  00:08   4097372      1        4   1   27.47   30.15      0       21   2   25.89   53.94      1       15   3   27.29    7.91
9  00:09   4097373      1        4   1   32.17   29.92      0       21   2   24.64   54.42      1       15   3   27.47    8.44
10 00:10   4097374      1        4   1   33.77   27.49      0       21   2   24.61   55.23      1       15   3   28.59    8.71

Long data example

   time reference group subject ID xcoord ycoord
1  00:01   4097365     1       4  1   7.44  38.16
2  00:01   4097365     0      21  2  33.90  47.26
3  00:01   4097365     1      15  3  21.53   2.67
4  00:02   4097366     1       4  1   9.84  37.03
5  00:02   4097367     0      21  2  32.98  48.47
6  00:02   4097368     1      15  3  21.82   2.95
7  00:03   4097369     1       4  1  12.01  35.83
8  00:03   4097370     0      21  2  30.17  50.33
9  00:03   4097371     1      15  3  22.06   4.45
10 00:04   4097372     1       4  1  12.15  34.17

edit: playing about a bit with the data I have managed to achieve this odd solution which is a mixture of long and wide data.

dput(head(df1))

structure(list(time = c(0, 0, 0, 0, 0, 0), state = structure(c(2L, 
2L, 2L, 2L, 2L, 2L), .Label = c("Alive", "Alive;:", "Dead", "Dead;:"
), class = "factor"), reference = c("1880439", "1880439", "1880439", 
"1880439", "1880439", "1880439"), num = c("1", NA, "2", "3", 
"4", "5"), group = c("1", NA, "1", "4", "0", "0"), X = c(NA, 
NA, NA, NA, NA, NA), ID = c(1L, NA, 2L, 4L, 5L, 6L), subect = c(21L, 
NA, 7L, -1L, 2L, 6L), x = c(3514L, NA, 2807L, 5550L, 3956L, 3686L
), y = c(-1644L, NA, -510L, 4400L, 1297L, -55L), speed = c("5.23", 
NA, "3.24", "0.00", "2.31", "3.57"), group1 = c("0", NA, "4", 
"1", "1", "0"), ID1 = c(13L, NA, 14L, 15L, 16L, 17L), subect1 = c(9L, 
NA, -1L, 13L, 14L, 11L), x1 = c(882L, NA, 5550L, 3004L, 761L, 
3317L), y1 = c(-1468L, NA, 4400L, 1633L, 559L, 1443L), speed1 = c("1.70", 
NA, "0.00", "3.06", "2.92", "3.30"), group2 = c("4", NA, "0", 
"1", "0", "0"), ID2 = c(24L, NA, 25L, 26L, 27L, 28L), subect2 = c(-1L, 
NA, 1L, 18L, 5L, 10L), x2 = c(5550L, NA, 5031L, 3936L, 3972L, 
3623L), y2 = c(4400L, NA, -74L, 190L, 686L, 356L), speed2 = c("0.00", 
NA, "0.54", "1.06", "0.95", "2.49"), speed.group2 = c(NA, NA, 
NA, NA, NA, NA)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-6L)).

the dataframe the code results in looks like this

> head(df1)
# A tibble: 6 x 24
   time state reference num   group X        ID subect     x     y speed group1   ID1 subect1    x1    y1 speed1 group2   ID2 subect2    x2    y2 speed2
  <dbl> <fct> <chr>     <chr> <chr> <lgl> <int>  <int> <int> <int> <chr> <chr>  <int>   <int> <int> <int> <chr>  <chr>  <int>   <int> <int> <int> <chr> 
1     0 Aliv~ 1880439   1     1     NA        1     21  3514 -1644 5.23  0         13       9   882 -1468 1.70   4         24      -1  5550  4400 0.00  
2     0 Aliv~ 1880439   NA    NA    NA       NA     NA    NA    NA NA    NA        NA      NA    NA    NA NA     NA        NA      NA    NA    NA NA    
3     0 Aliv~ 1880439   2     1     NA        2      7  2807  -510 3.24  4         14      -1  5550  4400 0.00   0         25       1  5031   -74 0.54  
4     0 Aliv~ 1880439   3     4     NA        4     -1  5550  4400 0.00  1         15      13  3004  1633 3.06   1         26      18  3936   190 1.06  
5     0 Aliv~ 1880439   4     0     NA        5      2  3956  1297 2.31  1         16      14   761   559 2.92   0         27       5  3972   686 0.95  
6     0 Aliv~ 1880439   5     0     NA        6      6  3686   -55 3.57  0         17      11  3317  1443 3.30   0         28      10  3623   356 2.49  
# ... with 1 more variable: speed.group2 <lgl>

Viewing all articles
Browse latest Browse all 201839

Trending Articles



<script src="https://jsc.adskeeper.com/r/s/rssing.com.1596347.js" async> </script>