I am hoping to get some help identifying:
- The location of missing observations when no NA values are present (entire row is missing).
- Create a row for the missing data with mean values based on mean values from certain categories.
My df:
Numerical load data from hockey players during practices over the season
Occasionally the accelerometers do not work during practice even though the player practiced. So to make sure we can still track the work they did during practice, I would like to insert the average mean value from their position (forward, defense, or goalie) from that practice. (i.e. if a goalie’s accelerometer does not work I would like to take the average loads of the other goalies and insert it into that player’s observational row for the practice).
- This would be a simpler task if there were rows ALL players each practice and NA values for loads when the accelerometer doesn’t work BUT when I download the data in a csv from the online cloud there are only rows for players who had working units. So that part is out of my control.
> head(DummyLoads)
Name Date Load Position
1 Jim 2019-10-19 900 2.100 Forward
2 Bob 2019-10-19 900 2.100 Forward
3 Dave 2019-10-19 900 2.100 Forward
4 Steve 2019-10-19 850 2.312 Forward
5 Fred 2019-10-19 850 2.312 Defense
6 Ray 2019-10-19 850 2.312 Defense
DummyLoads <- structure(list(Name = structure(c(4L, 1L, 2L, 6L, 3L, 5L, 4L, 1L, 2L, 3L, 5L, 4L, 1L, 2L, 6L, 3L, 5L, 2L, 6L, 3L, 5L),
.Label = c("Bob", "Dave", "Fred", "Jim", "Ray", "Steve"),
class = "factor"),
Date = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L),
.Label = c("2019-10-19", "2019-10-20", "2019-10-21", "2019-10-22"), class = "factor"),
Load = c(900L, 900L, 900L, 850L, 850L, 850L, 789L, 789L, 789L, 960L, 960L, 909L, 909L, 909L, 991L, 991L, 991L, 720L, 717L, 717L, 717L),
Load.Min = c(2.1, 2.1, 2.1, 2.312, 2.312, 2.312, 2.22, 2.22, 2.22, 2, 2, 1.88, 1.88, 1.88, 1.99, 1.99, 1.99, 2.1, 2.3, 2.3, 2.3), Position = structure(c(2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L),
.Label = c("Defense", "Forward"), class = "factor")), class = "data.frame", row.names = c(NA, -21L))
ggplot(DummyLoads, aes(x = Name, y = Load, fill = Position))+
geom_bar(stat = "identity")+
facet_grid(~Date)
Here is a chart showing players with missing data.
Ideally I would like to be able to identify those missing data points without having to plot it first. I also want to avoid having to manually calculate means each time and then input. Hoping to find an automated solution because I will have a full season of practices to do this with, but understand that might be tricky!
Thank you in advance for any suggestions. I apologize if I didn’t explain things clearly.
Actual df:
structure(list(Athlete = structure(c(1L, 1L, 1L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 7L,
7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 11L,
11L, 11L, 11L, 12L, 12L, 12L, 13L, 13L, 13L, 13L, 14L, 14L, 14L,
14L, 15L, 15L, 15L, 16L, 16L, 16L, 17L, 17L, 17L, 17L, 18L, 18L,
18L, 18L, 19L, 19L, 19L, 19L, 20L, 20L, 20L, 20L, 21L, 21L, 21L,
21L, 22L, 22L, 22L, 23L, 23L, 23L, 23L, 24L, 24L, 24L, 24L, 25L,
25L, 25L, 25L, 26L, 26L, 26L, 26L, 27L, 27L, 27L, 27L, 28L, 28L,
28L), .Label = c("Agosta", "Ambrose", "Bach", "Bettez", "Clark",
"Daoust", "Desbiens", "Eldridge", "Fast", "Fortino", "Gabel",
"Jenner", "Johnston", "Lacasse", "Lacquette", "Larocque", "Leslie",
"Maschmeyer", "Mikkelson", "Nurse", "Poulin", "Pozzebon", "Rattray",
"Rougeau", "Saulnier", "Stacey", "Tiley", "Turnbull"), class = "factor"),
Date = structure(c(18170, 18171, 18172, 18169, 18170, 18171,
18172, 18170, 18171, 18172, 18169, 18170, 18171, 18172, 18169,
18170, 18171, 18172, 18169, 18170, 18171, 18170, 18171, 18172,
18169, 18170, 18171, 18172, 18169, 18170, 18171, 18172, 18169,
18170, 18171, 18172, 18169, 18170, 18171, 18172, 18170, 18171,
18172, 18169, 18170, 18171, 18172, 18169, 18170, 18171, 18172,
18170, 18171, 18172, 18170, 18171, 18172, 18169, 18170, 18171,
18172, 18169, 18170, 18171, 18172, 18169, 18170, 18171, 18172,
18169, 18170, 18171, 18172, 18169, 18170, 18171, 18172, 18170,
18171, 18172, 18169, 18170, 18171, 18172, 18169, 18170, 18171,
18172, 18169, 18170, 18171, 18172, 18169, 18170, 18171, 18172,
18169, 18170, 18171, 18172, 18170, 18171, 18172), class = "Date"),
Position = structure(c(2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 2L, 2L, 2L), .Label = c("Defense", "Forward", "Goalie"
), class = "factor"), PL_Avg = c(178.5, 123, 174, 191, 163.5,
169, 155, 158.5, 158.5, 172, 214, 169, 220.5, 175, 235, 191.5,
217.5, 145, 217, 184.5, 181, 135, 68, 104, 190, 127.5, 129,
136, 210, 194, 208.5, 168, 220, 189.5, 213, 192, 180, 204,
167.5, 187, 178, 196.5, 151, 204, 200, 170.5, 165, 107, 116,
100.5, 88, 189.5, 131.5, 178, 176, 200.5, 157, 315, 174,
182.5, 168, 100, 130.5, 114.5, 75, 205, 154.5, 198, 175,
163, 160.5, 200, 149, 185, 154, 149.5, 182, 177, 192, 161,
287, 179.5, 166, 157, 188, 163.5, 171.5, 153, 220, 111.5,
197, 110, 214, 188.5, 184, 173, 100, 103, 99, 64, 173.5,
137, 143), PL_Min_Avg = c(2.41, 2.68, 2.46, 2.21, 2.205,
2.4, 2.19, 2.075, 2.185, 2.08, 2.46, 2.22, 2.615, 2.48, 2.7,
2.5, 2.57, 2.06, 2.5, 2.42, 2.505, 1.515, 1.43, 1.75, 1.67,
1.675, 1.785, 1.92, 2.42, 2.645, 2.485, 2.38, 2.54, 2.6,
2.5, 2.72, 2.08, 2.56, 2.315, 2.27, 2.33, 2.325, 2.13, 2.36,
2.28, 2.355, 2.33, 1.38, 1.3, 1.19, 1.47, 2.56, 2.33, 2.52,
2.385, 2.39, 2.23, 2.58, 2.28, 2.525, 2.38, 1.41, 1.465,
1.325, 1.82, 2.36, 2.58, 2.38, 2.48, 1.88, 2.115, 2.37, 2.11,
2.13, 2.02, 2.075, 1.8, 2.395, 2.29, 2.27, 2.35, 2.345, 2.29,
2.23, 2.16, 2.225, 2.02, 2.17, 2.53, 2.115, 2.72, 2.16, 2.47,
2.465, 2.555, 2.45, 1.29, 1.145, 1.145, 1.57, 1.97, 1.82,
2.03)), row.names = c(NA, -103L), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), groups = structure(list(Athlete = structure(c(1L,
1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L,
5L, 6L, 6L, 6L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 10L,
10L, 10L, 10L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 13L, 13L, 13L,
13L, 14L, 14L, 14L, 14L, 15L, 15L, 15L, 16L, 16L, 16L, 17L, 17L,
17L, 17L, 18L, 18L, 18L, 18L, 19L, 19L, 19L, 19L, 20L, 20L, 20L,
20L, 21L, 21L, 21L, 21L, 22L, 22L, 22L, 23L, 23L, 23L, 23L, 24L,
24L, 24L, 24L, 25L, 25L, 25L, 25L, 26L, 26L, 26L, 26L, 27L, 27L,
27L, 27L, 28L, 28L, 28L), .Label = c("Agosta", "Ambrose", "Bach",
"Bettez", "Clark", "Daoust", "Desbiens", "Eldridge", "Fast",
"Fortino", "Gabel", "Jenner", "Johnston", "Lacasse", "Lacquette",
"Larocque", "Leslie", "Maschmeyer", "Mikkelson", "Nurse", "Poulin",
"Pozzebon", "Rattray", "Rougeau", "Saulnier", "Stacey", "Tiley",
"Turnbull"), class = "factor"), Date = structure(c(18170, 18171,
18172, 18169, 18170, 18171, 18172, 18170, 18171, 18172, 18169,
18170, 18171, 18172, 18169, 18170, 18171, 18172, 18169, 18170,
18171, 18170, 18171, 18172, 18169, 18170, 18171, 18172, 18169,
18170, 18171, 18172, 18169, 18170, 18171, 18172, 18169, 18170,
18171, 18172, 18170, 18171, 18172, 18169, 18170, 18171, 18172,
18169, 18170, 18171, 18172, 18170, 18171, 18172, 18170, 18171,
18172, 18169, 18170, 18171, 18172, 18169, 18170, 18171, 18172,
18169, 18170, 18171, 18172, 18169, 18170, 18171, 18172, 18169,
18170, 18171, 18172, 18170, 18171, 18172, 18169, 18170, 18171,
18172, 18169, 18170, 18171, 18172, 18169, 18170, 18171, 18172,
18169, 18170, 18171, 18172, 18169, 18170, 18171, 18172, 18170,
18171, 18172), class = "Date"), .rows = list(1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L,
18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L,
30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L,
42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L,
54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L,
66L, 67L, 68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L, 76L, 77L,
78L, 79L, 80L, 81L, 82L, 83L, 84L, 85L, 86L, 87L, 88L, 89L,
90L, 91L, 92L, 93L, 94L, 95L, 96L, 97L, 98L, 99L, 100L, 101L,
102L, 103L)), row.names = c(NA, -103L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE))