###################################################################################################################
# Analysis of Local Optima Networks (LONs)
# Copyright (c) 2018 Gabriela Ochoa and Nadarajen Veerapen.
# Case study: Number Partitioning Problem (NPP) fully enumerated LONs
# Associated Research Paper:
# Ochoa, Gabriela; Veerapen, Nadarajen; Daolio, Fabio; Tomassini, Marco.
# Understanding Phase Transitions with LONs: Number Partitioning as a Case Study.
# Evolutionary Computation in Combinatorial Optimization - EvoCOP 2017, pp. 233-248, 2017
#
# LON construction. Including Monotonic Edges and Compressed Plateaus
# Input: zip file containing Nodes and Edges as text files
# Output: 2D and 3D plots, printout of some relevant LON metrics
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
# associated documentation files (the "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the
# following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or substantial
# portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
# LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWAR
##########################################################################################################################
# Check if required packages are installed or not. Install if required
packages <- c("igraph", "rgl")
if (length(setdiff(packages, rownames(installed.packages()))) > 0) {
install.packages(setdiff(packages, rownames(installed.packages())))
}
# Load required packages
library(igraph) # Network analysis and visualisation
library(rgl) # 3D plots
# Location of data files. Add here the path to your data files
path <- "" # insert the path to your files here
#path <- "/Users/gabriela/Dropbox/Networks/Tutorial/" # (Example)
setwd(path) # set working directory
# Number Partitioning Instances. N = size, k = phase transition parameter. Naming convention:
# npp10-0.4-10_2-0, N = 10, k = 0.4, rest of name indicates seed = 10 and escape edges with 2-flip.
# The two instances below are representative of before and after the Phase Transition. These are
# small instances that can be visualised completely without pruning. For larger instance, pruning
# might be required. Comment or uncomment to explore these two examples
instance <- "npp10-0.4-10_2-0" # Before the Phase Transition
#instance <- "npp10-1.0-10_2-0" # After the Phase Transition
# Default colours for node and edges
# LON model has 3 types of perturbation edges: (i)mprovement, (e)qual, (w)orsening
pi_ecol <- "gray50" # Opaque dark gray improvement edges
# alpha is for transparency: (as an opacity, 0 means fully transparent, max (255) opaque)
pe_ecol <- rgb(0, 0, 250, max = 255, alpha = 150) # transp. blue for neutral edges
pw_ecol <- rgb(0, 250, 0, max = 255, alpha = 150) # transp. green for worsening edges
# Node coloring for global and local sinks and other optima
colgs <- "#d7191c" # Color of global sinks (red)
colls <- "#2b83ba" # Color of local (i.e non-global) sinks (blue)
collo <- "gray65" # Color for all other optima (gray)
# Functions
# ----------------------------------------------------------------------------------------
# Width of edges based on their weight
edgeWidth<-function(N, minw, maxw) {
ewidth <- 1
if (ecount(N) > 0) {
ewidth <- (maxw * E(N)$weight)/max(E(N)$weight)
ewidth = ifelse(ewidth > minw, ewidth, minw)
ewidth = ifelse(ewidth < maxw, ewidth, maxw)
}
return (ewidth)
}
# ----------------------------------------------------------------------------------------
# Size of nodes based on their strength (weighted vertex degree)
nodeSizeStrength<-function(N, minsize, maxsize) {
vsize <- maxsize
if (ecount(N) > 0) {
vsize = 5* graph.strength(N, mode="in")
vsize = ifelse(vsize > minsize, vsize, minsize)
vsize = ifelse(vsize < maxsize, vsize, maxsize)
}
return (vsize)
}
#----------------------------------------------------------------------------------------------------------------------------
# Plot Network in 2D. Either in the screen or as pdf file (if bpdf is True)
# N: Graph object
# tit: String describing network, instance and type of LON
# ewidth: edge width
# asize: arrow size for plots
# ecurv: curvature of the edges (0 = non, 1 = max)
# mylay: graph layout as a parameter
# bpdf: boolean TRUE for generating a pdf
plotNet <-function(N, tit, nsize, ewidth, asize, ecurv, mylay, bpdf) {
inst <-strsplit(instance, "\\_")[[1]][1] # simplify instance name taking first part of name
if (bpdf) { # PDF for saving the 2D plot
ofname <- paste0(inst, tit, '.pdf')
pdf(ofname)
print(ofname)
}
vframecol <- ifelse(V(N)$fitness == best, "black", "gray50") # darker outline for global optima
title <- paste(inst, tit,'Nodes:',vcount(N), 'Edges:',ecount(N))
plot(N, layout = mylay, main = title, vertex.label = NA,
vertex.size = nsize, vertex.frame.color = vframecol,
edge.width = ewidth, edge.arrow.size = asize, edge.curved = ecurv)
if (bpdf)
dev.off()
}
#-----------------------------------------------------------------------------
# Plot network in 3D
# N = Network
# z: the z coordinate, normally fitness, but can have some scaling.
# ewidth: vector with edge widths
# asize: arrow size for plots
# mylayout: layout as a paremter so I can use the same for 2D and 3D
plotNet3D <-function(N, z, ewidth, asize, mylay) {
mylayout3D <- cbind(mylay, z) # append z coordinate to the 2D layout
rgl.open() # open new window
bg3d("white") # set background to white
rglplot(N, layout = mylayout3D, edge.width = ewidth, edge.arrow.size = asize, vertex.label = NA)
}
#------------------------------------------------------------------------
# Creates a sub-network with nodes with and below a given fitness level
# Keeping those nodes with a fitness below a percentile
# the higher the percentile the more nodes are kept.
# This function can be used to visualise large networks
pruneNodesFit <- function(N, perc) {
# subset network nodes below fvalue fitness
Top <- induced.subgraph(N, V(N)$fitness <= quantile(V(N)$fitness,perc))
return (simplify(Top))
}
## Main -----------------------------------------------------------------------------------
## Read data from zip file and construct the network models
print(instance)
zipname <- paste0(instance, ".zip")
nodename <- paste0(instance, ".nodes")
edgename <- paste0(instance, ".edges")
edges <- read.table(unz(zipname, edgename), header = F, colClasses = c("integer", "integer", "numeric"))
colnames(edges) <- c("start", "end", "weight")
nodes <- read.table(unz(zipname, nodename), header = F, colClasses = c("integer", "integer", "numeric"))
colnames(nodes) <- c("id", "fitness", "basin.size")
## Create LON from dataset of nodes and edges
LON <- graph_from_data_frame(d = edges, directed = T, vertices = nodes)
LON <- simplify(LON, remove.multiple=FALSE, remove.loops=TRUE) # Remove self loops
best <- min(nodes$fitness) # since we are minimising, and is full enumeration best is the minimum fitness
cat("Global Optimum Value:", best,"\n")
## Networks. 3 Models
# LON: Containing all local optima and edges
# MLON: LON with keeping only monotonic sequences (non-deteriorating edges)
# CMLON: MLON with compressed meta-plateaus used to identify sinks
# get the list of edges and fitness values in order to filter
el <- as_edgelist(LON)
fits <- V(LON)$fitness
names <- V(LON)$name
# get the fitness values at each endpoint of an edge
f1 <- fits[match(el[,1], names)]
f2 <- fits[match(el[,2], names)]
# Assuming a minimisation problem
E(LON)[which(f2f1)]$type <- "worsening" # worsening edges
E(LON)[which(f2==f1)]$type <- "equal" # equal fitness (neutral) edges
# Coloring edges according to type
E(LON)$color[E(LON)$type == "improving"] <- pi_ecol
E(LON)$color[E(LON)$type == "equal"] <- pe_ecol
E(LON)$color[E(LON)$type == "worsening"] <- pw_ecol
# Coloring nodes
V(LON)$color <- collo # default local optima
V(LON)$color[which(degree(LON,mode="out") == 0)] <- colls # local sinks
V(LON)$color[V(LON)$fitness == best] <- colgs # global sinks
# Default size of nodes in LON model is the basin size (fully enumerated)
V(LON)$size <- V(LON)$basin.size
## Monotonic LON keeps only the edges that correspond to non-deteriorating moves
## MLON inherits all the other properties from LON (nodes colours and size and edges colour)
MLON <- subgraph.edges(LON, which(E(LON)$type != "worsening"), delete.vertices = F) # Vertices are kept to maintain ID names
MLON <- simplify(MLON, remove.multiple=FALSE, remove.loops=TRUE) # Remove self-loops
## Constructing the CMLON. Contract meta-neutral-networks (pleteaus) to single nodes
mlon_sise <- V(MLON)$size # Keep original size as it will be modified to construct the CMLON
V(MLON)$size<-1 # required to construct the CMLON, size will be agregaterd
# check connectivity within meta-plateaus, only merge minima that are connected
gnn <- subgraph.edges(LON, which(f2==f1), delete.vertices=FALSE)
# get the components that are connected at the same fitness level
nn_memb <- components(gnn, mode="weak")$membership
# contract neutral connected components saving cardinality into node size
CMLON <- contract.vertices(MLON, mapping = nn_memb,
vertex.attr.comb = list(fitness = "first", size = "sum", "ignore"))
# The size of nodes is the aggregation/sum of nodes in the plateau
# remove self-loops and contract multiple edges
CMLON <- simplify(CMLON, edge.attr.comb = list(weight="sum"))
# identify sinks i.e nodes without outgoing edges
sinks_ids <-which(degree(CMLON, mode = "out")==0)
sinks_fit <- vertex_attr(CMLON, "fitness")[sinks_ids]
global_opt <- V(CMLON)[fitness == best]
## Add colour to CMLON nodes and edges.
V(CMLON)$color <- collo # default colour for local optima
V(CMLON)$color[V(CMLON) %in% sinks_ids] <- colls # Colour of suboptimal sinks
V(CMLON)$color[V(CMLON)$fitness == best] <- colgs # Colour of global sinks
E(CMLON)$color <- pi_ecol # edges are all improving in CMLON, so improving color used.
# Restoure MLON size, as it was modified to construct CMLON
V(MLON)$size <- mlon_sise
# Compute funnel metrics: number of local and global sinks
nsinks <- length(sinks_ids) # Number of sinks
nglobals <- length(global_opt) # Number of global sinks
# More funnel metrics
# Incoming strength of global optima sinks / normalised by the total incoming strength of sinks
igs<-sinks_ids[sinks_fit==best] # index of global sinks
ils<-sinks_ids[sinks_fit>best] # index of local sinks -- might be empty
sing <- sum(strength(graph = CMLON, vids = igs, mode = "in", loops = F), na.rm = T) # global sinks
sinl <- sum(strength(graph = CMLON, vids = ils, mode = "in", loops = F), na.rm = T) # local sinks
gstrength <- sing/(sing+sinl) # normalised incoming strength of global sinks
lstrength <- sinl/(sing+sinl) # normalised incoming strength of local sinks
#### METRICS ####
# Many metrics can be computed for networks, here we report some basic LON metrics
# and funnel metrics (CMLON model) known to relate to search difficulty
print("Metrics of the LON model")
cat("Number of nodes (local optima):", vcount(LON), "\n")
cat("Number of global optima:", length(V(LON)[fitness == best]), "\n")
print("Number of Edges:")
cat("Total:", ecount(LON), "\n")
cat("Improving:", length(E(LON)[type == "improving"]), "\n")
cat("Neutral:", length(E(LON)[type == "equal"]), "\n")
cat("Worsening:", length(E(LON)[type == "worsening"]), "\n")
print("Funnel Metrics ")
cat("Total number of sinks (funnels):", nsinks, "\n")
cat("Number of global sinks (funnels):", nglobals, "\n")
cat("Fitness of sinks:", sinks_fit, "\n")
cat("Normalised incoming strength of global sinks:", gstrength, "\n")
cat("Normalised incoming strength of sub-optimal (local) sinks:", lstrength, "\n")
#### VISUALISATION ####
# For large networks pruning might be required. Small networks can be visualised quickly.
# LON MODEL
lonlay <- layout.fruchterman.reingold(LON) # you can select a different different layot.
ew <- edgeWidth(LON, 0.1, 2.5) # set width of edges proportional weight
ns <- nodeSizeStrength(LON, 4, 15) # set size of nodes proportional to incoming strength
# Plot 2D LON with indicated parameter values. Both screen plot, and PDF file are produced
plotNet(N = LON, tit = "lon", nsize = ns, ewidth = ew, asize = 0.1, ecurv = 0.4, mylay = lonlay, bpdf = F)
plotNet(N = LON, tit = "lon", nsize = ns, ewidth = ew, asize = 0.3, ecurv = 0.4, mylay = lonlay, bpdf = T)
# Plot 3D LON with indicated parameter values. Use same layout and edge width
zcoord <- log(V(LON)$fitness + 1) # Use log to accentuate differences in fitness
plotNet3D(N = LON, z = zcoord, ewidth = ew, asize = 1, mylay = lonlay)
# Demo of animating a 3D image using LON Model as an example. Rotation applies to current active window.
play3d(spin3d(axis = c(1, 0, 0), rpm = 10), duration = 4) # rotation x axis to "vertical" position
play3d(spin3d(axis = c(0, 0, 1), rpm = 8), duration = 8) # spin z axis
# MLON MODEL
mlonlay <- layout_nicely(MLON)
ew <- edgeWidth(MLON, 0.3, 2)
ns <- nodeSizeStrength(MLON, 5, 18) # set size of nodes proportional to incoming strength
# Plot 2D LON with indicated parameter values
plotNet(N = MLON, tit = "mlon", nsize = ns, ewidth = ew, asize = 0.1, ecurv = 0.4, mylay = mlonlay, bpdf = F)
plotNet(N = MLON, tit = "mlon", nsize = ns, ewidth = ew, asize = 0.3, ecurv = 0.4, mylay = mlonlay, bpdf = T)
# Plot 3D LON with indicated parameter values. Use same layout and edge width
zcoord <- log(V(MLON)$fitness + 1) # Use log to accentuate differences in fitness
plotNet3D(N = MLON, z = zcoord, ewidth = ew, asize = 2, mylay = mlonlay)
# CMLON MODEL
cmlonlay <- layout_nicely(CMLON)
ew <- edgeWidth(CMLON, 1, 4)
ns <- V(CMLON)$size + 1 # set size as orginal in the CMLON model (size of plateaus)
# Plot 2D LON with indicated parameter values
plotNet(N = CMLON, tit = "cmlon", nsize = ns, ewidth = ew, asize = 0.2, ecurv=0.4, mylay = cmlonlay, bpdf = F)
plotNet(N = CMLON, tit = "cmlon", nsize = ns, ewidth = ew, asize = 0.4, ecurv=0.4, mylay = cmlonlay, bpdf = T)
# Plot 3D LON with indicated parameter values. Use same layouta and edge width
zcoord <-log(V(CMLON)$fitness + 1) # Use log to accentuate differences in fitness
plotNet3D(N = CMLON, z = zcoord, ewidth = ew, asize = 2.5, mylay = cmlonlay)