I am trying to reduce a full graph so that it only shows the strongest links.
To do that I am doing this:
- Multiply the original matrix by -1 (the values are in the interval [0,1])
- Obtain the minimum spanning tree, step 1. makes it a spanning tree with the strongest links
- Paste other strong links that are not a part of the spanning tree until the average number of links per node is <= 5.
My function for this goal is:
# number of links
n <- 10
n2 <- sqrt(n)
library(igraph)
set.seed(1234)
mat <- matrix(rnorm(n, 1, 0.5), nrow = n2, ncol = n2)
mat[mat < 0] <- 0
colnames(mat) <- letters[1:n2]
rownames(mat) <- letters[1:n2]
diag(mat) <- 1
trim_network_1 <- function(mat, avg_links = 5, tolerance = 0.01) {
mat <- (-1) * mat
g <- graph_from_adjacency_matrix(mat, weighted = TRUE, mode = "undirected", diag = FALSE)
g_mst <- mst(g, algorithm = "prim")
threshold <- 0
avg_links_n <- FALSE
while (avg_links_n == FALSE) {
if (threshold < 1) {
message(sprintf("%s threshold...", threshold))
g_not_in_mst <- delete.edges(g, which(abs(E(g)$weight) <= threshold))
g_not_in_mst <- graph.difference(g_not_in_mst, g_mst)
g <- rbind(
as_data_frame(g_mst),
as_data_frame(g_not_in_mst)
)
g <- graph_from_data_frame(g, directed = F)
avg_links_n <- ifelse(mean(degree(g)) <= avg_links, TRUE, FALSE)
threshold <- threshold + tolerance
if (avg_links_n == TRUE) {
message(sprintf("%s threshold achieves the avg number of connections", threshold))
E(g)$weight <- (-1) * E(g)$weight
return(g)
}
} else {
warning("no threshold achieves the avg number of connections\nreturning maximum spanning tree")
avg_links_n <- TRUE
E(g_mst)$weight <- (-1) * E(g_mst)$weight
return(g_mst)
}
}
}
trim_network_2 <- function(mat, avg_links = 5, tolerance = 0.01) {
mat <- (-1) * mat
g <- graph_from_adjacency_matrix(mat, weighted = TRUE, mode = "undirected", diag = FALSE)
g_mst <- mst(g, algorithm = "prim")
threshold <- 0
avg_links_n <- FALSE
while (avg_links_n == FALSE) {
if (threshold < 1) {
message(sprintf("%s threshold...", threshold))
g_not_in_mst <- delete.edges(g, which(abs(E(g)$weight) <= threshold))
g_not_in_mst <- graph.difference(g_not_in_mst, g_mst)
g <- graph.union(g_mst, g_not_in_mst)
E(g)$weight <- pmin(E(g)$weight_1, E(g)$weight_2, na.rm = T)
g <- remove.edge.attribute(g, "weight_1")
g <- remove.edge.attribute(g, "weight_2")
avg_links_n <- ifelse(mean(degree(g)) <= avg_links, TRUE, FALSE)
threshold <- threshold + tolerance
if (avg_links_n == TRUE) {
message(sprintf("%s threshold achieves the avg number of connections", threshold))
E(g)$weight <- (-1) * E(g)$weight
return(g)
}
} else {
warning("no threshold achieves the avg number of connections\nreturning maximum spanning tree")
avg_links_n <- TRUE
E(g_mst)$weight <- (-1) * E(g_mst)$weight
return(g_mst)
}
}
}
g1 <- trim_network_1(mat)
g2 <- trim_network_2(mat)
g1 <- as_data_frame(g1)
g2 <- as_data_frame(g2)
g1w <- order(g1$weight)
g1 <- g1[g1w, ]
g2w <- order(g2$weight)
g2 <- g2[g2w, ]
# this is symmetric so sometimes A-B is returned as B-A
# all.equal(g1, g2) doens't always hold bc of symmetry
all.equal(g1$weight, g2$weight)
all.equal(nrow(g1), nrow(g2))
I tried with graph.union(g_mst, g_not_in_mst)
and even after doing E(g)$weight <- pmin(E(g)$weight_1, E(g)$weight_2, na.rm = T)
I cannot obtain the same result as with the rbind()
part.
Any ideas? an "ideal" result would be not to convert to data frame to then convert to graph again.