I am finding some issues with creating a scatterplot based on the latest available observation per each variable. I would like to take the latest obs per each variable and each country and then compute the scatterplot. I created a short example with random number to show that in my infrastructure some countries are missing due to missing observation in the df
table. The chart should take 2019q1 for NL and FR .
library(zoo)
library(ggplot2)
library(ggrepel)
library(data.table)
# scatterplot preparation
set.seed(123)
country <- c("AT", "BE", "NL", "DE", "FR", "IT", "ES", "PT", "AT", "BE", "NL", "DE", "FR", "IT", "ES", "PT")
year <- as.yearqtr(c("2019 Q1", "2019 Q1","2019 Q1", "2019 Q1", "2019 Q1", "2019 Q1", "2019 Q1", "2019 Q1", "2019 Q2", "2019 Q2", "2019 Q2", "2019 Q2", "2019 Q2", "2019 Q2", "2019 Q2", "2019 Q2"))
HPG <- runif(16, min=0, max=5)
HAR <- runif(16, min=-1, max=3)
HAR[c(11,13)] <- NA
df <- data.frame(country, year, HPG, HAR)
df <- as.data.table(df)
df2019q2 <- df[df$year == "2019 Q2"]
ggplot(data = df2019q2, aes(x = HAR, y = HPG, label = country)) +
geom_point(colour = "blue") +
geom_label_repel(aes(label = country),
box.padding = 0.35,
point.padding = 0.5,
segment.color = 'grey50') +
theme_bw() +
guides(linetype = FALSE, size = FALSE) +
scale_y_continuous(name = "HPG", breaks = scales::pretty_breaks(n = 10), limits = c(-6, 4)) +
scale_x_continuous(name = "HAR", breaks = scales::pretty_breaks(n = 10))