I have some data (iris dataset) and I also have a plotting function called decisionplot
. My question is how can I apply the decisionplot
function (which outputs a ggplot
plot) to all combinations of variables in my data. That is, in the iris data set we have;
"Sepal.Length""Sepal.Width""Petal.Length""Petal.Width""Species"
I want to apply the function decisionplot
to the following combinations:
"Sepal.Length""Sepal.Width""Sepal.Length""Petal.Length""Sepal.Length""Petal.Width""Sepal.Width""Petal.Length""Sepal.Width""Petal.Width""Petal.Length""Petal.Width"
Store these plot combinations in a list where I can plot on a grid the different outputs.
The data and the decisionplot
function is the following:
data(iris)
df <- iris %>%
filter(Species != "setosa") %>%
mutate(Species = +(Species == "virginica"))
decisionplot <- function(model, data, class = NULL, model_type = "NA", predict_type = "class",
resolution = 300, main, showgrid = TRUE, ...) {
if(!is.null(class)){
cl <- data[,class]
}
else{
cl <- 1
}
k <- length(unique(cl))
# make grid
r <- sapply(data[,1:2], range, na.rm = TRUE)
xs <- seq(r[1,1], r[2,1], length.out = resolution)
ys <- seq(r[1,2], r[2,2], length.out = resolution)
g <- cbind(rep(xs, each=resolution), rep(ys, time = resolution))
colnames(g) <- colnames(r)
g <- as.data.frame(g)
### guess how to get class labels from predict
### (unfortunately not very consistent between models)
if(model_type == "xgboost"){
message(paste0("Running Model ", model_type))
p <- predict(object = model, newdata = xgboost::xgb.DMatrix(data = as.matrix(g)), type = predict_type)
#p <- ifelse(p > tail(sort(p),1000)[1000], "Bankrupt", "Non-Bankrupt")
p <- ifelse(p > 0.50, "Bankrupt", "Non-Bankrupt")
}
else {
message(paste0("Running Model ", model_type))
p <- predict(model, g, type = predict_type)
}
if(is.list(p)) p <- p$class
if(is.logical(p)) p <- ifelse(p == TRUE, "Non-Bankrupt", "Bankrupt")
p <- as.factor(p)
plot_data <- cbind(g, p) %>%
setNames(c("X1", "X2", "Y")) %>%
mutate(class_num = case_when(
Y == "Non-Bankrupt" ~ 1,
Y == "Bankrupt" ~ 2
))
ggplot() +
geom_point(aes(x = X1, y = X2, colour = Y),
data = plot_data) +
geom_contour(aes(x = X1, y = X2, z = class_num),
bins = 2,
data = plot_data, size = 0.001) +
geom_point(aes(x = Xvar1, y = Xvar2, color = factor(Yvar)),
size = 3,
data = data) +
geom_point(aes(x = Xvar1, y = Xvar2),
size = 3,
shape = 1,
#alpha = 0.2,
data = data) +
labs(title = paste(main),
x = var1_to_plot,
y = var2_to_plot)
}
I then set my variable I want to put through the function and create my boundary_data
:
var1_to_plot = "Sepal.Length"
var2_to_plot = "Sepal.Width"
boundary_data <- df %>%
dplyr::select(rlang::eval_tidy(var1_to_plot), rlang::eval_tidy(var2_to_plot), Species) %>%
mutate(Species = factor(Species)) %>%
setNames(c("Xvar1", "Xvar2", "Yvar")) %>%
data.frame()
I finally create my logistic model and run the decisionplot
function.
model <- glm(Yvar ~., data = boundary_data, family = binomial(link='logit'))
class(model) <- c("lr", class(model))
predict.lr <- function(object, newdata, ...)
predict.glm(object, newdata, type = "response") > .5
decisionplot(model, boundary_data, predict_type = "response", class = "Yvar", model_type = "Logistic", main = "Logistic Regression")
This gives me:
However I want to make it such that this will be one of all the combinations of the variables (since here it just consideres Sepal.Width
and Sepal.Length
).
How can I plot on a grid the different ggplot
variable combinations? Or store the plots as a list?
EDIT
What I currently have is the following - Which gives me 12 lists of different combinations. Each list has 2 further lists a data set and a logistic model.
var_combos <- expand.grid(colnames(df[,1:4]), colnames(df[,1:4])) %>%
filter(!Var1 == Var2)
iter_function = function(dat, V1, V2, Y){
data = dat %>%
select(rlang::eval_tidy(V1), rlang::eval_tidy(V2), Y) %>%
mutate(Y = factor(rlang::eval_tidy(Y)))
# Logistic Model
model = glm(rlang::eval_tidy(Y) ~ ., data = data, family = binomial(link = 'logit'))
class(model) <- c("lr", class(model))
predict.lr <- function(object, newdata, ...){
predict.glm(object, newdata, type = "response") > .5
}
return(list(data, model))
}
boundary_lists <- map2(
.x = var_combos$Var1,
.y = var_combos$Var2,
~iter_function(dat = df, V1 = .x, V2 = .y, Y = df$Species)
)
Now I want to apply each of these lists to the decisionplot
function. I keep getting . not found
.
map2(
.x = .[[1]],
.y = .[[2]],
~decisionplot(model = .x, data = .y, predict_type = "response", class = "Species", model_type = "Logistic", main = "Logistic Regression")
)
How can I map the funtion of list of lists? (I realise I am not giving it the boundary_lists
list created from the iter_function()
)