Quantcast
Channel: Active questions tagged r - Stack Overflow
Viewing all articles
Browse latest Browse all 201839

Condense multiple observations into distinct events based on time threshold

$
0
0

This is a subset of the data I'm working with:

structure(list(user_login = structure(c(7L, 7L, 7L, 7L, 4L, 4L, 
4L, 4L, 1L, 1L, 1L, 1L, 5L, 5L, 5L, 5L, 5L, 2L, 2L, 2L, 2L, 6L, 
6L, 6L, 3L), .Label = c("charles_cornellbirds", "Rachael_cornellbirds", 
"USER1125013", "USER399555", "USER413265", "USER602873", "USER947968"
), class = "factor"), time_video = c(96, 135, 209, 211, 101, 
140, 215, 216, 95, 136, 208, 209, 105, 146, 233, 234, 282, 93, 
134, 209, 209, 136, 209, 210, 101), dummy_value = c(1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1)), row.names = c(8L, 15L, 21L, 22L, 34L, 42L, 51L, 52L, 69L, 
84L, 97L, 98L, 121L, 131L, 139L, 141L, 148L, 160L, 174L, 188L, 
189L, 229L, 242L, 243L, 267L), class = "data.frame")

If you take this data and graph it, it looks like this: Bar chart displayed in the question Each bar is when a user logs the arrival of a bird to the feeder. The red is the what really happened: 4 birds arrived.

I'd like to be able to lump the other user data to find out the same info that the one "expert" saw.

I've been able to lump together multiple observations using a time threshold, as suggested by Jon Spring in this answer to a previous post

Here is the full dataset and code I'd like to modify because it only results in 2 observations when I know there are 4 "real" observations:

subset <- structure(list(id = c(1602L, 1549L, 1487L, 1513L, 1753L, 1712L, 
1616L, 1494L, 1564L, 1672L, 1522L, 1761L, 1722L, 1577L, 1500L, 
1578L, 1631L, 1632L, 1686L, 1687L, 1501L, 1767L, 1531L, 1768L, 
1532L, 1730L, 1732L, 1774L, 1740L), user_login = structure(c(2L, 
1L, 7L, 4L, 3L, 5L, 2L, 7L, 1L, 6L, 4L, 3L, 5L, 1L, 7L, 1L, 2L, 
2L, 6L, 6L, 7L, 3L, 4L, 3L, 4L, 5L, 5L, 3L, 5L), .Label = c("charles_cornellbirds", 
"Rachael_cornellbirds", "USER1125013", "USER399555", "USER413265", 
"USER602873", "USER947968"), class = "factor"), user_email = structure(c(7L, 
2L, 6L, 5L, 4L, 3L, 7L, 6L, 2L, 1L, 5L, 4L, 3L, 2L, 6L, 2L, 7L, 
7L, 1L, 1L, 6L, 4L, 5L, 4L, 5L, 3L, 3L, 4L, 3L), .Label = c("bwalter5@huskers.unl.edu", 
"charles@cornell.edu", "chooz2cruz@gmail.com", "hollybczzi@aol.com", 
"lisaberry@cinci.rr.com", "mercuryscaviar@aol.com", "rpm237@cornell.edu"
), class = "factor"), time_in_utc = structure(c(118L, 68L, 8L, 
33L, 255L, 220L, 131L, 15L, 80L, 182L, 41L, 262L, 228L, 93L, 
20L, 94L, 145L, 145L, 195L, 196L, 21L, 268L, 50L, 269L, 51L, 
233L, 234L, 275L, 242L), .Label = c("10/29/2019 17:35:21", "10/29/2019 18:01:49", 
"10/29/2019 18:01:53", "10/29/2019 18:02:01", "10/29/2019 18:02:14", 
"10/29/2019 18:03:09", "10/29/2019 18:03:18", "10/29/2019 18:03:25", 
"10/29/2019 18:03:30", "10/29/2019 18:03:32", "10/29/2019 18:03:38", 
"10/29/2019 18:03:41", "10/29/2019 18:04:00", "10/29/2019 18:04:03", 
"10/29/2019 18:04:04", "10/29/2019 18:04:32", "10/29/2019 18:04:33", 
"10/29/2019 18:04:50", "10/29/2019 18:05:17", "10/29/2019 18:05:18", 
"10/29/2019 18:05:20", "10/29/2019 18:05:38", "10/29/2019 18:05:39", 
"10/29/2019 18:05:45", "10/29/2019 18:54:24", "10/29/2019 18:54:30", 
"10/29/2019 18:54:39", "10/29/2019 18:54:45", "10/29/2019 18:55:01", 
"10/29/2019 18:55:29", "10/29/2019 18:55:55", "10/29/2019 18:56:04", 
"10/29/2019 18:56:11", "10/29/2019 18:56:12", "10/29/2019 18:56:18", 
"10/29/2019 18:56:23", "10/29/2019 18:56:26", "10/29/2019 18:56:39", 
"10/29/2019 18:56:45", "10/29/2019 18:56:47", "10/29/2019 18:56:50", 
"10/29/2019 18:56:55", "10/29/2019 18:56:59", "10/29/2019 18:57:10", 
"10/29/2019 18:57:12", "10/29/2019 18:57:14", "10/29/2019 18:57:23", 
"10/29/2019 18:57:36", "10/29/2019 18:57:37", "10/29/2019 18:58:05", 
"10/29/2019 18:58:06", "10/29/2019 18:58:19", "10/29/2019 18:58:25", 
"10/29/2019 18:58:37", "10/29/2019 18:58:39", "10/29/2019 18:58:52", 
"10/29/2019 18:58:54", "10/29/2019 18:59:17", "10/29/2019 18:59:41", 
"10/30/2019 13:43:32", "10/30/2019 13:45:16", "10/30/2019 13:45:20", 
"10/30/2019 13:45:24", "10/30/2019 13:45:41", "10/30/2019 13:46:10", 
"10/30/2019 13:46:35", "10/30/2019 13:46:44", "10/30/2019 13:46:51", 
"10/30/2019 13:46:52", "10/30/2019 13:46:53", "10/30/2019 13:46:58", 
"10/30/2019 13:47:03", "10/30/2019 13:47:06", "10/30/2019 13:47:08", 
"10/30/2019 13:47:19", "10/30/2019 13:47:22", "10/30/2019 13:47:25", 
"10/30/2019 13:47:26", "10/30/2019 13:47:31", "10/30/2019 13:47:32", 
"10/30/2019 13:47:34", "10/30/2019 13:47:35", "10/30/2019 13:47:39", 
"10/30/2019 13:47:41", "10/30/2019 13:47:48", "10/30/2019 13:47:49", 
"10/30/2019 13:47:52", "10/30/2019 13:47:53", "10/30/2019 13:48:04", 
"10/30/2019 13:48:15", "10/30/2019 13:48:16", "10/30/2019 13:48:18", 
"10/30/2019 13:48:44", "10/30/2019 13:48:45", "10/30/2019 13:48:46", 
"10/30/2019 13:48:49", "10/30/2019 13:48:59", "10/30/2019 13:49:06", 
"10/30/2019 13:49:11", "10/30/2019 13:49:17", "10/30/2019 13:49:20", 
"10/30/2019 13:49:30", "10/30/2019 13:49:33", "10/30/2019 13:49:35", 
"10/30/2019 13:49:37", "10/30/2019 13:49:44", "10/30/2019 13:50:36", 
"10/31/2019 13:25:44", "10/31/2019 15:45:07", "10/31/2019 15:45:32", 
"10/31/2019 15:46:52", "10/31/2019 15:46:55", "10/31/2019 15:47:00", 
"10/31/2019 15:47:17", "10/31/2019 15:47:44", "10/31/2019 15:48:12", 
"10/31/2019 15:48:18", "10/31/2019 15:48:25", "10/31/2019 15:48:27", 
"10/31/2019 15:48:28", "10/31/2019 15:48:32", "10/31/2019 15:48:34", 
"10/31/2019 15:48:37", "10/31/2019 15:48:39", "10/31/2019 15:48:40", 
"10/31/2019 15:48:41", "10/31/2019 15:48:55", "10/31/2019 15:48:56", 
"10/31/2019 15:48:59", "10/31/2019 15:49:01", "10/31/2019 15:49:06", 
"10/31/2019 15:49:07", "10/31/2019 15:49:10", "10/31/2019 15:49:15", 
"10/31/2019 15:49:17", "10/31/2019 15:49:23", "10/31/2019 15:49:24", 
"10/31/2019 15:49:25", "10/31/2019 15:49:28", "10/31/2019 15:49:39", 
"10/31/2019 15:49:51", "10/31/2019 15:49:53", "10/31/2019 15:49:59", 
"10/31/2019 15:50:20", "10/31/2019 15:50:21", "10/31/2019 15:50:23", 
"10/31/2019 15:50:24", "10/31/2019 15:50:34", "10/31/2019 15:50:41", 
"10/31/2019 15:50:47", "10/31/2019 15:50:53", "10/31/2019 15:50:55", 
"10/31/2019 15:51:07", "10/31/2019 15:51:09", "10/31/2019 15:51:10", 
"10/31/2019 15:51:18", "10/31/2019 15:51:19", "10/31/2019 15:51:31", 
"10/31/2019 15:52:00", "10/31/2019 17:02:57", "10/31/2019 17:03:10", 
"10/31/2019 17:03:15", "10/31/2019 17:03:19", "10/31/2019 17:03:35", 
"10/31/2019 17:04:03", "10/31/2019 17:04:29", "10/31/2019 17:04:38", 
"10/31/2019 17:04:46", "10/31/2019 17:04:49", "10/31/2019 17:04:51", 
"10/31/2019 17:04:53", "10/31/2019 17:04:55", "10/31/2019 17:04:58", 
"10/31/2019 17:05:00", "10/31/2019 17:05:01", "10/31/2019 17:05:02", 
"10/31/2019 17:05:14", "10/31/2019 17:05:17", "10/31/2019 17:05:20", 
"10/31/2019 17:05:21", "10/31/2019 17:05:25", "10/31/2019 17:05:26", 
"10/31/2019 17:05:29", "10/31/2019 17:05:33", "10/31/2019 17:05:34", 
"10/31/2019 17:05:35", "10/31/2019 17:05:41", "10/31/2019 17:05:44", 
"10/31/2019 17:05:47", "10/31/2019 17:05:58", "10/31/2019 17:06:00", 
"10/31/2019 17:06:09", "10/31/2019 17:06:11", "10/31/2019 17:06:13", 
"10/31/2019 17:06:39", "10/31/2019 17:06:40", "10/31/2019 17:06:41", 
"10/31/2019 17:06:42", "10/31/2019 17:06:52", "10/31/2019 17:07:00", 
"10/31/2019 17:07:06", "10/31/2019 17:07:07", "10/31/2019 17:07:12", 
"10/31/2019 17:07:14", "10/31/2019 17:07:24", "10/31/2019 17:07:25", 
"10/31/2019 17:07:30", "10/31/2019 17:07:37", "10/31/2019 17:07:39", 
"10/31/2019 17:07:51", "10/31/2019 17:08:19", "10/31/2019 20:52:14", 
"11/01/2019 17:37:53", "11/01/2019 17:37:56", "11/01/2019 17:38:14", 
"11/01/2019 17:38:17", "11/01/2019 17:38:34", "11/01/2019 17:39:26", 
"11/01/2019 17:39:34", "11/01/2019 17:39:41", "11/01/2019 17:39:46", 
"11/01/2019 17:39:47", "11/01/2019 17:39:49", "11/01/2019 17:39:54", 
"11/01/2019 17:40:04", "11/01/2019 17:40:12", "11/01/2019 17:40:16", 
"11/01/2019 17:40:22", "11/01/2019 17:40:29", "11/01/2019 17:40:53", 
"11/01/2019 17:41:01", "11/01/2019 17:41:25", "11/01/2019 17:41:49", 
"11/01/2019 17:41:50", "11/01/2019 17:42:03", "11/01/2019 17:42:10", 
"11/01/2019 17:42:16", "11/01/2019 17:42:17", "11/01/2019 17:42:19", 
"11/01/2019 17:42:26", "11/01/2019 17:42:32", "11/01/2019 17:42:38", 
"11/01/2019 17:42:45", "11/01/2019 17:43:27", "11/01/2019 23:01:41", 
"11/01/2019 23:01:51", "11/01/2019 23:02:02", "11/01/2019 23:02:05", 
"11/01/2019 23:02:16", "11/01/2019 23:02:23", "11/01/2019 23:02:25", 
"11/01/2019 23:02:48", "11/01/2019 23:03:19", "11/01/2019 23:03:25", 
"11/01/2019 23:03:32", "11/01/2019 23:03:34", "11/01/2019 23:03:35", 
"11/01/2019 23:03:40", "11/01/2019 23:03:45", "11/01/2019 23:03:49", 
"11/01/2019 23:03:50", "11/01/2019 23:04:12", "11/01/2019 23:04:17", 
"11/01/2019 23:04:20", "11/01/2019 23:04:36", "11/01/2019 23:04:45", 
"11/01/2019 23:04:59", "11/01/2019 23:05:24", "11/01/2019 23:05:26", 
"11/01/2019 23:05:27", "11/01/2019 23:05:40", "11/01/2019 23:05:47", 
"11/01/2019 23:05:57", "11/01/2019 23:05:59", "11/01/2019 23:06:15", 
"11/01/2019 23:06:16", "11/01/2019 23:06:26", "11/01/2019 23:06:38", 
"11/01/2019 23:07:04"), class = "factor"), time = structure(c(59L, 
10L, 192L, 217L, 255L, 161L, 72L, 199L, 22L, 123L, 225L, 262L, 
169L, 35L, 204L, 36L, 86L, 86L, 136L, 137L, 205L, 268L, 234L, 
269L, 235L, 174L, 175L, 275L, 183L), .Label = c("13:25:44", "13:43:32", 
"13:45:16", "13:45:20", "13:45:24", "13:45:41", "13:46:10", "13:46:35", 
"13:46:44", "13:46:51", "13:46:52", "13:46:53", "13:46:58", "13:47:03", 
"13:47:06", "13:47:08", "13:47:19", "13:47:22", "13:47:25", "13:47:26", 
"13:47:31", "13:47:32", "13:47:34", "13:47:35", "13:47:39", "13:47:41", 
"13:47:48", "13:47:49", "13:47:52", "13:47:53", "13:48:04", "13:48:15", 
"13:48:16", "13:48:18", "13:48:44", "13:48:45", "13:48:46", "13:48:49", 
"13:48:59", "13:49:06", "13:49:11", "13:49:17", "13:49:20", "13:49:30", 
"13:49:33", "13:49:35", "13:49:37", "13:49:44", "13:50:36", "15:45:07", 
"15:45:32", "15:46:52", "15:46:55", "15:47:00", "15:47:17", "15:47:44", 
"15:48:12", "15:48:18", "15:48:25", "15:48:27", "15:48:28", "15:48:32", 
"15:48:34", "15:48:37", "15:48:39", "15:48:40", "15:48:41", "15:48:55", 
"15:48:56", "15:48:59", "15:49:01", "15:49:06", "15:49:07", "15:49:10", 
"15:49:15", "15:49:17", "15:49:23", "15:49:24", "15:49:25", "15:49:28", 
"15:49:39", "15:49:51", "15:49:53", "15:49:59", "15:50:20", "15:50:21", 
"15:50:23", "15:50:24", "15:50:34", "15:50:41", "15:50:47", "15:50:53", 
"15:50:55", "15:51:07", "15:51:09", "15:51:10", "15:51:18", "15:51:19", 
"15:51:31", "15:52:00", "17:02:57", "17:03:10", "17:03:15", "17:03:19", 
"17:03:35", "17:04:03", "17:04:29", "17:04:38", "17:04:46", "17:04:49", 
"17:04:51", "17:04:53", "17:04:55", "17:04:58", "17:05:00", "17:05:01", 
"17:05:02", "17:05:14", "17:05:17", "17:05:20", "17:05:21", "17:05:25", 
"17:05:26", "17:05:29", "17:05:33", "17:05:34", "17:05:35", "17:05:41", 
"17:05:44", "17:05:47", "17:05:58", "17:06:00", "17:06:09", "17:06:11", 
"17:06:13", "17:06:39", "17:06:40", "17:06:41", "17:06:42", "17:06:52", 
"17:07:00", "17:07:06", "17:07:07", "17:07:12", "17:07:14", "17:07:24", 
"17:07:25", "17:07:30", "17:07:37", "17:07:39", "17:07:51", "17:08:19", 
"17:35:21", "17:37:53", "17:37:56", "17:38:14", "17:38:17", "17:38:34", 
"17:39:26", "17:39:34", "17:39:41", "17:39:46", "17:39:47", "17:39:49", 
"17:39:54", "17:40:04", "17:40:12", "17:40:16", "17:40:22", "17:40:29", 
"17:40:53", "17:41:01", "17:41:25", "17:41:49", "17:41:50", "17:42:03", 
"17:42:10", "17:42:16", "17:42:17", "17:42:19", "17:42:26", "17:42:32", 
"17:42:38", "17:42:45", "17:43:27", "18:01:49", "18:01:53", "18:02:01", 
"18:02:14", "18:03:09", "18:03:18", "18:03:25", "18:03:30", "18:03:32", 
"18:03:38", "18:03:41", "18:04:00", "18:04:03", "18:04:04", "18:04:32", 
"18:04:33", "18:04:50", "18:05:17", "18:05:18", "18:05:20", "18:05:38", 
"18:05:39", "18:05:45", "18:54:24", "18:54:30", "18:54:39", "18:54:45", 
"18:55:01", "18:55:29", "18:55:55", "18:56:04", "18:56:11", "18:56:12", 
"18:56:18", "18:56:23", "18:56:26", "18:56:39", "18:56:45", "18:56:47", 
"18:56:50", "18:56:55", "18:56:59", "18:57:10", "18:57:12", "18:57:14", 
"18:57:23", "18:57:36", "18:57:37", "18:58:05", "18:58:06", "18:58:19", 
"18:58:25", "18:58:37", "18:58:39", "18:58:52", "18:58:54", "18:59:17", 
"18:59:41", "20:52:14", "23:01:41", "23:01:51", "23:02:02", "23:02:05", 
"23:02:16", "23:02:23", "23:02:25", "23:02:48", "23:03:19", "23:03:25", 
"23:03:32", "23:03:34", "23:03:35", "23:03:40", "23:03:45", "23:03:49", 
"23:03:50", "23:04:12", "23:04:17", "23:04:20", "23:04:36", "23:04:45", 
"23:04:59", "23:05:24", "23:05:26", "23:05:27", "23:05:40", "23:05:47", 
"23:05:57", "23:05:59", "23:06:15", "23:06:16", "23:06:26", "23:06:38", 
"23:07:04"), class = "factor"), study_name = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Collecting Data At The Panama Fruit Feeder", class = "factor"), 
    observation_name = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("activity", "session_status", 
    "species"), class = "factor"), observation_value = structure(c(6L, 
    6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
    6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L), .Label = c("Aggressive behavior", 
    "Chestnut-headed Oropendola", "Clay-colored Thrush", "Gray-cowled Wood-Rail", 
    "Gray-headed Chachalaca", "Rufous Motmot", "start", "stop"
    ), class = "factor"), delete_flag = c(0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0), time_video = c(93, 95, 96, 101, 101, 105, 134, 
    135, 136, 136, 140, 141, 146, 208, 209, 209, 209, 209, 209, 
    210, 211, 213, 215, 215, 216, 233, 234, 264, 282), date_time = structure(c(1572551305, 
    1572457611, 1572386605, 1572389771, 1572663812, 1572644381, 
    1572551346, 1572386644, 1572457652, 1572555926, 1572389810, 
    1572663852, 1572644422, 1572457724, 1572386718, 1572457725, 
    1572551421, 1572551421, 1572555999, 1572556000, 1572386720, 
    1572663924, 1572389885, 1572663926, 1572389886, 1572644509, 
    1572644510, 1572663975, 1572644558), class = c("POSIXct", 
    "POSIXt"), tzone = ""), date = structure(c(18200, 18199, 
    18198, 18198, 18202, 18201, 18200, 18198, 18199, 18200, 18198, 
    18202, 18201, 18199, 18198, 18199, 18200, 18200, 18200, 18200, 
    18198, 18202, 18198, 18202, 18198, 18201, 18201, 18202, 18201
    ), class = "Date"), dummy_value = c(1, 1, 1, 1, 1, 1, 1, 
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
    1, 1, 1)), row.names = c(160L, 69L, 8L, 34L, 267L, 121L, 
174L, 15L, 84L, 229L, 42L, 275L, 131L, 97L, 21L, 98L, 188L, 189L, 
242L, 243L, 22L, 281L, 51L, 282L, 52L, 139L, 141L, 288L, 148L
), class = "data.frame")
subset2 <- subset2[order(subset2$time_video),]

v <- subset2 %>%
  dplyr::arrange(time_video) # can just do by time_video and not date because right now the video is canned 

v2 <- v %>%
  mutate(time_since_last = (time_video - lag(time_video, default = first(time_video)))) %>%
  mutate(group = 1 + cumsum(time_since_last > 60)) %>% 
  group_by(group) %>%
  summarize(first = min(time_video), # or first(date.time) if sorted
            last  = max(time_video), # or last(date.time) if sorted
            count = n()) %>%
  mutate(time = last-first)

ggplot() + 
  geom_rect(data = v2, aes(xmin=first, xmax=last, ymin=0, ymax=count), alpha=0.8, color = "steelblue", fill="steelblue")  + # >60 secs
  geom_text(data = v2, aes(x=first, y=count, label = count), vjust = -0.4, hjust = 0.3, color = "steelblue", size = 5) + 
  theme_minimal() + 
  labs(y = "Number of Observations", x = "Time") + 
  theme(text = element_text(size=20))

Viewing all articles
Browse latest Browse all 201839

Trending Articles



<script src="https://jsc.adskeeper.com/r/s/rssing.com.1596347.js" async> </script>