This is a subset of the data I'm working with:
structure(list(user_login = structure(c(7L, 7L, 7L, 7L, 4L, 4L,
4L, 4L, 1L, 1L, 1L, 1L, 5L, 5L, 5L, 5L, 5L, 2L, 2L, 2L, 2L, 6L,
6L, 6L, 3L), .Label = c("charles_cornellbirds", "Rachael_cornellbirds",
"USER1125013", "USER399555", "USER413265", "USER602873", "USER947968"
), class = "factor"), time_video = c(96, 135, 209, 211, 101,
140, 215, 216, 95, 136, 208, 209, 105, 146, 233, 234, 282, 93,
134, 209, 209, 136, 209, 210, 101), dummy_value = c(1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1)), row.names = c(8L, 15L, 21L, 22L, 34L, 42L, 51L, 52L, 69L,
84L, 97L, 98L, 121L, 131L, 139L, 141L, 148L, 160L, 174L, 188L,
189L, 229L, 242L, 243L, 267L), class = "data.frame")
If you take this data and graph it, it looks like this: Each bar is when a user logs the arrival of a bird to the feeder. The red is the what really happened: 4 birds arrived.
I'd like to be able to lump the other user data to find out the same info that the one "expert" saw.
I've been able to lump together multiple observations using a time threshold, as suggested by Jon Spring in this answer to a previous post
Here is the full dataset and code I'd like to modify because it only results in 2 observations when I know there are 4 "real" observations:
subset <- structure(list(id = c(1602L, 1549L, 1487L, 1513L, 1753L, 1712L,
1616L, 1494L, 1564L, 1672L, 1522L, 1761L, 1722L, 1577L, 1500L,
1578L, 1631L, 1632L, 1686L, 1687L, 1501L, 1767L, 1531L, 1768L,
1532L, 1730L, 1732L, 1774L, 1740L), user_login = structure(c(2L,
1L, 7L, 4L, 3L, 5L, 2L, 7L, 1L, 6L, 4L, 3L, 5L, 1L, 7L, 1L, 2L,
2L, 6L, 6L, 7L, 3L, 4L, 3L, 4L, 5L, 5L, 3L, 5L), .Label = c("charles_cornellbirds",
"Rachael_cornellbirds", "USER1125013", "USER399555", "USER413265",
"USER602873", "USER947968"), class = "factor"), user_email = structure(c(7L,
2L, 6L, 5L, 4L, 3L, 7L, 6L, 2L, 1L, 5L, 4L, 3L, 2L, 6L, 2L, 7L,
7L, 1L, 1L, 6L, 4L, 5L, 4L, 5L, 3L, 3L, 4L, 3L), .Label = c("bwalter5@huskers.unl.edu",
"charles@cornell.edu", "chooz2cruz@gmail.com", "hollybczzi@aol.com",
"lisaberry@cinci.rr.com", "mercuryscaviar@aol.com", "rpm237@cornell.edu"
), class = "factor"), time_in_utc = structure(c(118L, 68L, 8L,
33L, 255L, 220L, 131L, 15L, 80L, 182L, 41L, 262L, 228L, 93L,
20L, 94L, 145L, 145L, 195L, 196L, 21L, 268L, 50L, 269L, 51L,
233L, 234L, 275L, 242L), .Label = c("10/29/2019 17:35:21", "10/29/2019 18:01:49",
"10/29/2019 18:01:53", "10/29/2019 18:02:01", "10/29/2019 18:02:14",
"10/29/2019 18:03:09", "10/29/2019 18:03:18", "10/29/2019 18:03:25",
"10/29/2019 18:03:30", "10/29/2019 18:03:32", "10/29/2019 18:03:38",
"10/29/2019 18:03:41", "10/29/2019 18:04:00", "10/29/2019 18:04:03",
"10/29/2019 18:04:04", "10/29/2019 18:04:32", "10/29/2019 18:04:33",
"10/29/2019 18:04:50", "10/29/2019 18:05:17", "10/29/2019 18:05:18",
"10/29/2019 18:05:20", "10/29/2019 18:05:38", "10/29/2019 18:05:39",
"10/29/2019 18:05:45", "10/29/2019 18:54:24", "10/29/2019 18:54:30",
"10/29/2019 18:54:39", "10/29/2019 18:54:45", "10/29/2019 18:55:01",
"10/29/2019 18:55:29", "10/29/2019 18:55:55", "10/29/2019 18:56:04",
"10/29/2019 18:56:11", "10/29/2019 18:56:12", "10/29/2019 18:56:18",
"10/29/2019 18:56:23", "10/29/2019 18:56:26", "10/29/2019 18:56:39",
"10/29/2019 18:56:45", "10/29/2019 18:56:47", "10/29/2019 18:56:50",
"10/29/2019 18:56:55", "10/29/2019 18:56:59", "10/29/2019 18:57:10",
"10/29/2019 18:57:12", "10/29/2019 18:57:14", "10/29/2019 18:57:23",
"10/29/2019 18:57:36", "10/29/2019 18:57:37", "10/29/2019 18:58:05",
"10/29/2019 18:58:06", "10/29/2019 18:58:19", "10/29/2019 18:58:25",
"10/29/2019 18:58:37", "10/29/2019 18:58:39", "10/29/2019 18:58:52",
"10/29/2019 18:58:54", "10/29/2019 18:59:17", "10/29/2019 18:59:41",
"10/30/2019 13:43:32", "10/30/2019 13:45:16", "10/30/2019 13:45:20",
"10/30/2019 13:45:24", "10/30/2019 13:45:41", "10/30/2019 13:46:10",
"10/30/2019 13:46:35", "10/30/2019 13:46:44", "10/30/2019 13:46:51",
"10/30/2019 13:46:52", "10/30/2019 13:46:53", "10/30/2019 13:46:58",
"10/30/2019 13:47:03", "10/30/2019 13:47:06", "10/30/2019 13:47:08",
"10/30/2019 13:47:19", "10/30/2019 13:47:22", "10/30/2019 13:47:25",
"10/30/2019 13:47:26", "10/30/2019 13:47:31", "10/30/2019 13:47:32",
"10/30/2019 13:47:34", "10/30/2019 13:47:35", "10/30/2019 13:47:39",
"10/30/2019 13:47:41", "10/30/2019 13:47:48", "10/30/2019 13:47:49",
"10/30/2019 13:47:52", "10/30/2019 13:47:53", "10/30/2019 13:48:04",
"10/30/2019 13:48:15", "10/30/2019 13:48:16", "10/30/2019 13:48:18",
"10/30/2019 13:48:44", "10/30/2019 13:48:45", "10/30/2019 13:48:46",
"10/30/2019 13:48:49", "10/30/2019 13:48:59", "10/30/2019 13:49:06",
"10/30/2019 13:49:11", "10/30/2019 13:49:17", "10/30/2019 13:49:20",
"10/30/2019 13:49:30", "10/30/2019 13:49:33", "10/30/2019 13:49:35",
"10/30/2019 13:49:37", "10/30/2019 13:49:44", "10/30/2019 13:50:36",
"10/31/2019 13:25:44", "10/31/2019 15:45:07", "10/31/2019 15:45:32",
"10/31/2019 15:46:52", "10/31/2019 15:46:55", "10/31/2019 15:47:00",
"10/31/2019 15:47:17", "10/31/2019 15:47:44", "10/31/2019 15:48:12",
"10/31/2019 15:48:18", "10/31/2019 15:48:25", "10/31/2019 15:48:27",
"10/31/2019 15:48:28", "10/31/2019 15:48:32", "10/31/2019 15:48:34",
"10/31/2019 15:48:37", "10/31/2019 15:48:39", "10/31/2019 15:48:40",
"10/31/2019 15:48:41", "10/31/2019 15:48:55", "10/31/2019 15:48:56",
"10/31/2019 15:48:59", "10/31/2019 15:49:01", "10/31/2019 15:49:06",
"10/31/2019 15:49:07", "10/31/2019 15:49:10", "10/31/2019 15:49:15",
"10/31/2019 15:49:17", "10/31/2019 15:49:23", "10/31/2019 15:49:24",
"10/31/2019 15:49:25", "10/31/2019 15:49:28", "10/31/2019 15:49:39",
"10/31/2019 15:49:51", "10/31/2019 15:49:53", "10/31/2019 15:49:59",
"10/31/2019 15:50:20", "10/31/2019 15:50:21", "10/31/2019 15:50:23",
"10/31/2019 15:50:24", "10/31/2019 15:50:34", "10/31/2019 15:50:41",
"10/31/2019 15:50:47", "10/31/2019 15:50:53", "10/31/2019 15:50:55",
"10/31/2019 15:51:07", "10/31/2019 15:51:09", "10/31/2019 15:51:10",
"10/31/2019 15:51:18", "10/31/2019 15:51:19", "10/31/2019 15:51:31",
"10/31/2019 15:52:00", "10/31/2019 17:02:57", "10/31/2019 17:03:10",
"10/31/2019 17:03:15", "10/31/2019 17:03:19", "10/31/2019 17:03:35",
"10/31/2019 17:04:03", "10/31/2019 17:04:29", "10/31/2019 17:04:38",
"10/31/2019 17:04:46", "10/31/2019 17:04:49", "10/31/2019 17:04:51",
"10/31/2019 17:04:53", "10/31/2019 17:04:55", "10/31/2019 17:04:58",
"10/31/2019 17:05:00", "10/31/2019 17:05:01", "10/31/2019 17:05:02",
"10/31/2019 17:05:14", "10/31/2019 17:05:17", "10/31/2019 17:05:20",
"10/31/2019 17:05:21", "10/31/2019 17:05:25", "10/31/2019 17:05:26",
"10/31/2019 17:05:29", "10/31/2019 17:05:33", "10/31/2019 17:05:34",
"10/31/2019 17:05:35", "10/31/2019 17:05:41", "10/31/2019 17:05:44",
"10/31/2019 17:05:47", "10/31/2019 17:05:58", "10/31/2019 17:06:00",
"10/31/2019 17:06:09", "10/31/2019 17:06:11", "10/31/2019 17:06:13",
"10/31/2019 17:06:39", "10/31/2019 17:06:40", "10/31/2019 17:06:41",
"10/31/2019 17:06:42", "10/31/2019 17:06:52", "10/31/2019 17:07:00",
"10/31/2019 17:07:06", "10/31/2019 17:07:07", "10/31/2019 17:07:12",
"10/31/2019 17:07:14", "10/31/2019 17:07:24", "10/31/2019 17:07:25",
"10/31/2019 17:07:30", "10/31/2019 17:07:37", "10/31/2019 17:07:39",
"10/31/2019 17:07:51", "10/31/2019 17:08:19", "10/31/2019 20:52:14",
"11/01/2019 17:37:53", "11/01/2019 17:37:56", "11/01/2019 17:38:14",
"11/01/2019 17:38:17", "11/01/2019 17:38:34", "11/01/2019 17:39:26",
"11/01/2019 17:39:34", "11/01/2019 17:39:41", "11/01/2019 17:39:46",
"11/01/2019 17:39:47", "11/01/2019 17:39:49", "11/01/2019 17:39:54",
"11/01/2019 17:40:04", "11/01/2019 17:40:12", "11/01/2019 17:40:16",
"11/01/2019 17:40:22", "11/01/2019 17:40:29", "11/01/2019 17:40:53",
"11/01/2019 17:41:01", "11/01/2019 17:41:25", "11/01/2019 17:41:49",
"11/01/2019 17:41:50", "11/01/2019 17:42:03", "11/01/2019 17:42:10",
"11/01/2019 17:42:16", "11/01/2019 17:42:17", "11/01/2019 17:42:19",
"11/01/2019 17:42:26", "11/01/2019 17:42:32", "11/01/2019 17:42:38",
"11/01/2019 17:42:45", "11/01/2019 17:43:27", "11/01/2019 23:01:41",
"11/01/2019 23:01:51", "11/01/2019 23:02:02", "11/01/2019 23:02:05",
"11/01/2019 23:02:16", "11/01/2019 23:02:23", "11/01/2019 23:02:25",
"11/01/2019 23:02:48", "11/01/2019 23:03:19", "11/01/2019 23:03:25",
"11/01/2019 23:03:32", "11/01/2019 23:03:34", "11/01/2019 23:03:35",
"11/01/2019 23:03:40", "11/01/2019 23:03:45", "11/01/2019 23:03:49",
"11/01/2019 23:03:50", "11/01/2019 23:04:12", "11/01/2019 23:04:17",
"11/01/2019 23:04:20", "11/01/2019 23:04:36", "11/01/2019 23:04:45",
"11/01/2019 23:04:59", "11/01/2019 23:05:24", "11/01/2019 23:05:26",
"11/01/2019 23:05:27", "11/01/2019 23:05:40", "11/01/2019 23:05:47",
"11/01/2019 23:05:57", "11/01/2019 23:05:59", "11/01/2019 23:06:15",
"11/01/2019 23:06:16", "11/01/2019 23:06:26", "11/01/2019 23:06:38",
"11/01/2019 23:07:04"), class = "factor"), time = structure(c(59L,
10L, 192L, 217L, 255L, 161L, 72L, 199L, 22L, 123L, 225L, 262L,
169L, 35L, 204L, 36L, 86L, 86L, 136L, 137L, 205L, 268L, 234L,
269L, 235L, 174L, 175L, 275L, 183L), .Label = c("13:25:44", "13:43:32",
"13:45:16", "13:45:20", "13:45:24", "13:45:41", "13:46:10", "13:46:35",
"13:46:44", "13:46:51", "13:46:52", "13:46:53", "13:46:58", "13:47:03",
"13:47:06", "13:47:08", "13:47:19", "13:47:22", "13:47:25", "13:47:26",
"13:47:31", "13:47:32", "13:47:34", "13:47:35", "13:47:39", "13:47:41",
"13:47:48", "13:47:49", "13:47:52", "13:47:53", "13:48:04", "13:48:15",
"13:48:16", "13:48:18", "13:48:44", "13:48:45", "13:48:46", "13:48:49",
"13:48:59", "13:49:06", "13:49:11", "13:49:17", "13:49:20", "13:49:30",
"13:49:33", "13:49:35", "13:49:37", "13:49:44", "13:50:36", "15:45:07",
"15:45:32", "15:46:52", "15:46:55", "15:47:00", "15:47:17", "15:47:44",
"15:48:12", "15:48:18", "15:48:25", "15:48:27", "15:48:28", "15:48:32",
"15:48:34", "15:48:37", "15:48:39", "15:48:40", "15:48:41", "15:48:55",
"15:48:56", "15:48:59", "15:49:01", "15:49:06", "15:49:07", "15:49:10",
"15:49:15", "15:49:17", "15:49:23", "15:49:24", "15:49:25", "15:49:28",
"15:49:39", "15:49:51", "15:49:53", "15:49:59", "15:50:20", "15:50:21",
"15:50:23", "15:50:24", "15:50:34", "15:50:41", "15:50:47", "15:50:53",
"15:50:55", "15:51:07", "15:51:09", "15:51:10", "15:51:18", "15:51:19",
"15:51:31", "15:52:00", "17:02:57", "17:03:10", "17:03:15", "17:03:19",
"17:03:35", "17:04:03", "17:04:29", "17:04:38", "17:04:46", "17:04:49",
"17:04:51", "17:04:53", "17:04:55", "17:04:58", "17:05:00", "17:05:01",
"17:05:02", "17:05:14", "17:05:17", "17:05:20", "17:05:21", "17:05:25",
"17:05:26", "17:05:29", "17:05:33", "17:05:34", "17:05:35", "17:05:41",
"17:05:44", "17:05:47", "17:05:58", "17:06:00", "17:06:09", "17:06:11",
"17:06:13", "17:06:39", "17:06:40", "17:06:41", "17:06:42", "17:06:52",
"17:07:00", "17:07:06", "17:07:07", "17:07:12", "17:07:14", "17:07:24",
"17:07:25", "17:07:30", "17:07:37", "17:07:39", "17:07:51", "17:08:19",
"17:35:21", "17:37:53", "17:37:56", "17:38:14", "17:38:17", "17:38:34",
"17:39:26", "17:39:34", "17:39:41", "17:39:46", "17:39:47", "17:39:49",
"17:39:54", "17:40:04", "17:40:12", "17:40:16", "17:40:22", "17:40:29",
"17:40:53", "17:41:01", "17:41:25", "17:41:49", "17:41:50", "17:42:03",
"17:42:10", "17:42:16", "17:42:17", "17:42:19", "17:42:26", "17:42:32",
"17:42:38", "17:42:45", "17:43:27", "18:01:49", "18:01:53", "18:02:01",
"18:02:14", "18:03:09", "18:03:18", "18:03:25", "18:03:30", "18:03:32",
"18:03:38", "18:03:41", "18:04:00", "18:04:03", "18:04:04", "18:04:32",
"18:04:33", "18:04:50", "18:05:17", "18:05:18", "18:05:20", "18:05:38",
"18:05:39", "18:05:45", "18:54:24", "18:54:30", "18:54:39", "18:54:45",
"18:55:01", "18:55:29", "18:55:55", "18:56:04", "18:56:11", "18:56:12",
"18:56:18", "18:56:23", "18:56:26", "18:56:39", "18:56:45", "18:56:47",
"18:56:50", "18:56:55", "18:56:59", "18:57:10", "18:57:12", "18:57:14",
"18:57:23", "18:57:36", "18:57:37", "18:58:05", "18:58:06", "18:58:19",
"18:58:25", "18:58:37", "18:58:39", "18:58:52", "18:58:54", "18:59:17",
"18:59:41", "20:52:14", "23:01:41", "23:01:51", "23:02:02", "23:02:05",
"23:02:16", "23:02:23", "23:02:25", "23:02:48", "23:03:19", "23:03:25",
"23:03:32", "23:03:34", "23:03:35", "23:03:40", "23:03:45", "23:03:49",
"23:03:50", "23:04:12", "23:04:17", "23:04:20", "23:04:36", "23:04:45",
"23:04:59", "23:05:24", "23:05:26", "23:05:27", "23:05:40", "23:05:47",
"23:05:57", "23:05:59", "23:06:15", "23:06:16", "23:06:26", "23:06:38",
"23:07:04"), class = "factor"), study_name = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Collecting Data At The Panama Fruit Feeder", class = "factor"),
observation_name = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("activity", "session_status",
"species"), class = "factor"), observation_value = structure(c(6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L), .Label = c("Aggressive behavior",
"Chestnut-headed Oropendola", "Clay-colored Thrush", "Gray-cowled Wood-Rail",
"Gray-headed Chachalaca", "Rufous Motmot", "start", "stop"
), class = "factor"), delete_flag = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0), time_video = c(93, 95, 96, 101, 101, 105, 134,
135, 136, 136, 140, 141, 146, 208, 209, 209, 209, 209, 209,
210, 211, 213, 215, 215, 216, 233, 234, 264, 282), date_time = structure(c(1572551305,
1572457611, 1572386605, 1572389771, 1572663812, 1572644381,
1572551346, 1572386644, 1572457652, 1572555926, 1572389810,
1572663852, 1572644422, 1572457724, 1572386718, 1572457725,
1572551421, 1572551421, 1572555999, 1572556000, 1572386720,
1572663924, 1572389885, 1572663926, 1572389886, 1572644509,
1572644510, 1572663975, 1572644558), class = c("POSIXct",
"POSIXt"), tzone = ""), date = structure(c(18200, 18199,
18198, 18198, 18202, 18201, 18200, 18198, 18199, 18200, 18198,
18202, 18201, 18199, 18198, 18199, 18200, 18200, 18200, 18200,
18198, 18202, 18198, 18202, 18198, 18201, 18201, 18202, 18201
), class = "Date"), dummy_value = c(1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1)), row.names = c(160L, 69L, 8L, 34L, 267L, 121L,
174L, 15L, 84L, 229L, 42L, 275L, 131L, 97L, 21L, 98L, 188L, 189L,
242L, 243L, 22L, 281L, 51L, 282L, 52L, 139L, 141L, 288L, 148L
), class = "data.frame")
subset2 <- subset2[order(subset2$time_video),]
v <- subset2 %>%
dplyr::arrange(time_video) # can just do by time_video and not date because right now the video is canned
v2 <- v %>%
mutate(time_since_last = (time_video - lag(time_video, default = first(time_video)))) %>%
mutate(group = 1 + cumsum(time_since_last > 60)) %>%
group_by(group) %>%
summarize(first = min(time_video), # or first(date.time) if sorted
last = max(time_video), # or last(date.time) if sorted
count = n()) %>%
mutate(time = last-first)
ggplot() +
geom_rect(data = v2, aes(xmin=first, xmax=last, ymin=0, ymax=count), alpha=0.8, color = "steelblue", fill="steelblue") + # >60 secs
geom_text(data = v2, aes(x=first, y=count, label = count), vjust = -0.4, hjust = 0.3, color = "steelblue", size = 5) +
theme_minimal() +
labs(y = "Number of Observations", x = "Time") +
theme(text = element_text(size=20))