Source code for pygsp.graphs.community
# -*- coding: utf-8 -*-
from __future__ import division
import collections
import copy
import numpy as np
from scipy import sparse, spatial
from pygsp import utils
from . import Graph # prevent circular import in Python < 3.5
[docs]class Community(Graph):
r"""Community graph.
Parameters
----------
N : int
Number of nodes (default = 256).
Nc : int (optional)
Number of communities (default = :math:`\lfloor \sqrt{N}/2 \rceil`).
min_comm : int (optional)
Minimum size of the communities
(default = :math:`\lfloor N/Nc/3 \rceil`).
min_deg : int (optional)
NOT IMPLEMENTED. Minimum degree of each node (default = 0).
comm_sizes : int (optional)
Size of the communities (default = random).
size_ratio : float (optional)
Ratio between the radius of world and the radius of communities
(default = 1).
world_density : float (optional)
Probability of a random edge between two different communities
(default = 1/N).
comm_density : float (optional)
Probability of a random edge inside any community (default = None,
which implies k_neigh or epsilon will be used to determine
intra-edges).
k_neigh : int (optional)
Number of intra-community connections.
Not used if comm_density is defined (default = None, which implies
comm_density or epsilon will be used to determine intra-edges).
epsilon : float (optional)
Largest distance at which two nodes sharing a community are connected.
Not used if k_neigh or comm_density is defined
(default = :math:`\sqrt{2\sqrt{N}}/2`).
seed : int
Seed for the random number generator (for reproducible graphs).
Examples
--------
>>> import matplotlib.pyplot as plt
>>> G = graphs.Community(N=250, Nc=3, comm_sizes=[50, 120, 80], seed=42)
>>> fig, axes = plt.subplots(1, 2)
>>> _ = axes[0].spy(G.W, markersize=0.5)
>>> G.plot(ax=axes[1])
"""
def __init__(self,
N=256,
Nc=None,
min_comm=None,
min_deg=0,
comm_sizes=None,
size_ratio=1,
world_density=None,
comm_density=None,
k_neigh=None,
epsilon=None,
seed=None,
**kwargs):
if Nc is None:
Nc = int(round(np.sqrt(N) / 2))
if min_comm is None:
min_comm = int(round(N / (3 * Nc)))
if world_density is None:
world_density = 1 / N
if not 0 <= world_density <= 1:
raise ValueError('World density should be in [0, 1].')
if epsilon is None:
epsilon = np.sqrt(2 * np.sqrt(N)) / 2
rs = np.random.RandomState(seed)
self.logger = utils.build_logger(__name__, **kwargs)
w_data = [[], [[], []]]
if min_comm * Nc > N:
raise ValueError('The constraint on minimum size for communities is unsolvable.')
info = {'node_com': None, 'comm_sizes': None, 'world_rad': None,
'world_density': world_density, 'min_comm': min_comm}
# Communities construction #
if comm_sizes is None:
mandatory_labels = np.tile(np.arange(Nc), (min_comm,)) # min_comm labels for each of the Nc communities
remaining_labels = rs.choice(Nc, N - min_comm * Nc) # random choice for the remaining labels
info['node_com'] = np.sort(np.concatenate((mandatory_labels, remaining_labels)))
else:
if len(comm_sizes) != Nc:
raise ValueError('There should be Nc community sizes.')
if np.sum(comm_sizes) != N:
raise ValueError('The sum of community sizes should be N.')
# create labels based on the constraint given for the community sizes. No random assignation here.
info['node_com'] = np.concatenate([[val] * cnt for (val, cnt) in enumerate(comm_sizes)])
counts = collections.Counter(info['node_com'])
info['comm_sizes'] = np.array([cnt[1] for cnt in sorted(counts.items())])
info['world_rad'] = size_ratio * np.sqrt(N)
# Intra-community edges construction #
if comm_density is not None:
# random picking edges following the community density (same for all communities)
comm_density = float(comm_density)
comm_density = comm_density if 0. <= comm_density <= 1. else 0.1
info['comm_density'] = comm_density
self.logger.info('Constructed using community density = {}'.format(comm_density))
elif k_neigh is not None:
# k-NN among the nodes in the same community (same k for all communities)
if k_neigh < 0:
raise ValueError('k_neigh cannot be negative.')
info['k_neigh'] = k_neigh
self.logger.info('Constructed using K-NN with k = {}'.format(k_neigh))
else:
# epsilon-NN among the nodes in the same community (same eps for all communities)
info['epsilon'] = epsilon
self.logger.info('Constructed using eps-NN with eps = {}'.format(epsilon))
# Coordinates #
info['com_coords'] = info['world_rad'] * np.array(list(zip(
np.cos(2 * np.pi * np.arange(1, Nc + 1) / Nc),
np.sin(2 * np.pi * np.arange(1, Nc + 1) / Nc))))
coords = rs.rand(N, 2) # nodes' coordinates inside the community
coords = np.array([[elem[0] * np.cos(2 * np.pi * elem[1]),
elem[0] * np.sin(2 * np.pi * elem[1])] for elem in coords])
for i in range(N):
# set coordinates as an offset from the center of the community it belongs to
comm_idx = info['node_com'][i]
comm_rad = np.sqrt(info['comm_sizes'][comm_idx])
coords[i] = info['com_coords'][comm_idx] + comm_rad * coords[i]
first_node = 0
for i in range(Nc):
com_siz = info['comm_sizes'][i]
M = com_siz * (com_siz - 1) / 2
if comm_density is not None:
nb_edges = int(comm_density * M)
tril_ind = np.tril_indices(com_siz, -1)
indices = rs.permutation(int(M))[:nb_edges]
w_data[0] += [1] * nb_edges
w_data[1][0] += [first_node + tril_ind[1][elem] for elem in indices]
w_data[1][1] += [first_node + tril_ind[0][elem] for elem in indices]
elif k_neigh is not None:
comm_coords = coords[first_node:first_node + com_siz]
kdtree = spatial.KDTree(comm_coords)
__, indices = kdtree.query(comm_coords, k=k_neigh + 1)
pairs_set = set()
map(lambda row: map(lambda elm: pairs_set.add((min(row[0], elm), max(row[0], elm))), row[1:]), indices)
w_data[0] += [1] * len(pairs_set)
w_data[1][0] += [first_node + pair[0] for pair in pairs_set]
w_data[1][1] += [first_node + pair[1] for pair in pairs_set]
else:
comm_coords = coords[first_node:first_node + com_siz]
kdtree = spatial.KDTree(comm_coords)
pairs_set = kdtree.query_pairs(epsilon)
w_data[0] += [1] * len(pairs_set)
w_data[1][0] += [first_node + elem[0] for elem in pairs_set]
w_data[1][1] += [first_node + elem[1] for elem in pairs_set]
first_node += com_siz
# Inter-community edges construction #
M = (N**2 - np.sum([com_siz**2 for com_siz in info['comm_sizes']])) / 2
nb_edges = int(world_density * M)
if world_density < 0.35:
# use regression sampling
inter_edges = set()
while len(inter_edges) < nb_edges:
new_point = rs.randint(0, N, 2)
if info['node_com'][min(new_point)] != info['node_com'][max(new_point)]:
inter_edges.add((min(new_point), max(new_point)))
else:
# use random permutation
indices = rs.permutation(int(M))[:nb_edges]
all_points, first_col = [], 0
for i in range(Nc - 1):
nb_col = info['comm_sizes'][i]
first_row = np.sum(info['comm_sizes'][:i+1])
for j in range(i+1, Nc):
nb_row = info['comm_sizes'][j]
all_points += [(first_row + r, first_col + c) for r in range(nb_row) for c in range(nb_col)]
first_row += nb_row
first_col += nb_col
inter_edges = np.array(all_points)[indices]
w_data[0] += [1] * nb_edges
w_data[1][0] += [elem[0] for elem in inter_edges]
w_data[1][1] += [elem[1] for elem in inter_edges]
w_data[0] += w_data[0]
tmp_w_data = copy.deepcopy(w_data[1][0])
w_data[1][0] += w_data[1][1]
w_data[1][1] += tmp_w_data
w_data[1] = tuple(w_data[1])
W = sparse.coo_matrix(tuple(w_data), shape=(N, N))
for key, value in {'Nc': Nc, 'info': info}.items():
setattr(self, key, value)
super(Community, self).__init__(W=W, gtype='Community', coords=coords, **kwargs)