Source code for pygsp.graphs.community
# -*- coding: utf-8 -*-
from . import Graph
from pygsp.utils import build_logger
from collections import Counter
from copy import deepcopy
import numpy as np
from scipy import sparse, spatial
[docs]class Community(Graph):
r"""
Create a community graph.
Parameters
----------
N : int
Number of nodes (default = 256)
kwargs : Dict
Optional parameters for the construction of the Community graph
Nc : int
Number of communities (default = :math:`round(\sqrt{N}/2)`)
min_comm : int
Minimum size of the communities (default = round(N/Nc/3))
min_deg : int
Minimum degree of each node (default = 0, NOT IMPLEMENTED YET)
comm_sizes : int
Size of the communities (default = random)
size_ratio : float
Ratio between the radius of world and the radius of communities (default = 1)
world_density : float
Probability of a random edge between two different communities (default = 1/N)
comm_density : float
Probability of a random edge inside any community (default = None, not used if None)
k_neigh : int
Number of intra-community connections (default = None, not used if None or comm_density is defined)
epsilon : float
Max distance at which two nodes sharing a community are connected
(default = :math:`sqrt(2\sqrt{N})/2`, not used if k_neigh or comm_density is defined)
Examples
--------
>>> from pygsp import graphs
>>> G = graphs.Community()
"""
def __init__(self, N=256, **kwargs):
# Parameter initialisation #
N = int(N)
Nc = int(kwargs.pop('Nc', int(round(np.sqrt(N)/2.))))
min_comm = int(kwargs.pop('min_comm', int(round(N / (3. * Nc)))))
min_deg = int(kwargs.pop('min_deg', 0))
comm_sizes = kwargs.pop('comm_sizes', np.array([]))
size_ratio = float(kwargs.pop('size_ratio', 1.))
world_density = float(kwargs.pop('world_density', 1. / N))
world_density = world_density if 0 <= world_density <= 1 else 1. / N
comm_density = kwargs.pop('comm_density', None)
k_neigh = kwargs.pop('k_neigh', None)
epsilon = float(kwargs.pop('epsilon', np.sqrt(2 * np.sqrt(N)) / 2))
self.logger = build_logger(__name__, **kwargs)
w_data = [[], [[], []]]
try:
if len(comm_sizes) > 0:
if np.sum(comm_sizes) != N:
raise ValueError('GSP_COMMUNITY: The sum of the community sizes has to be equal to N.')
if len(comm_sizes) != Nc:
raise ValueError('GSP_COMMUNITY: The length of the community sizes has to be equal to Nc.')
except TypeError:
raise TypeError("GSP_COMMUNITY: comm_sizes expected to be a list or array, got {}".format(type(comm_sizes)))
if min_comm * Nc > N:
raise ValueError('GSP_COMMUNITY: The constraint on minimum size for communities is unsolvable.')
info = {'node_com': None, 'comm_sizes': None, 'world_rad': None,
'world_density': world_density, 'min_comm': min_comm}
# Communities construction #
if comm_sizes.shape[0] == 0:
mandatory_labels = np.tile(np.arange(Nc), (min_comm,)) # min_comm labels for each of the Nc communities
remaining_labels = np.random.choice(Nc, N - min_comm * Nc) # random choice for the remaining labels
info['node_com'] = np.sort(np.concatenate((mandatory_labels, remaining_labels)))
else:
# create labels based on the constraint given for the community sizes. No random assignation here.
info['node_com'] = np.concatenate([[val] * cnt for (val, cnt) in enumerate(comm_sizes)])
counts = Counter(info['node_com'])
info['comm_sizes'] = np.array([cnt[1] for cnt in sorted(counts.items())])
info['world_rad'] = size_ratio * np.sqrt(N)
# Intra-community edges construction #
if comm_density:
# random picking edges following the community density (same for all communities)
comm_density = float(comm_density)
comm_density = comm_density if 0. <= comm_density <= 1. else 0.1
info['comm_density'] = comm_density
self.logger.info("GSP_COMMUNITY: Constructed using community density = {}".format(comm_density))
elif k_neigh:
# k-NN among the nodes in the same community (same k for all communities)
k_neigh = int(k_neigh)
k_neigh = k_neigh if k_neigh > 0 else 10
info['k_neigh'] = k_neigh
self.logger.info("GSP_COMMUNITY: Constructed using K-NN with k = {}".format(k_neigh))
else:
# epsilon-NN among the nodes in the same community (same eps for all communities)
info['epsilon'] = epsilon
self.logger.info("GSP_COMMUNITY: Constructed using eps-NN with eps = {}".format(epsilon))
first_node = 0
for i in range(Nc):
com_siz = info['comm_sizes'][i]
M = com_siz * (com_siz - 1) / 2
if comm_density:
nb_edges = int(comm_density * M)
tril_ind = np.tril_indices(com_siz, -1)
indices = np.random.permutation(M)[:nb_edges]
w_data[0] += [1] * nb_edges
w_data[1][0] += [first_node + tril_ind[1][elem] for elem in indices]
w_data[1][1] += [first_node + tril_ind[0][elem] for elem in indices]
elif k_neigh:
comm_coords = coords[first_node:first_node + com_siz]
kdtree = spatial.KDTree(comm_coords)
__, indices = kdtree.query(comm_coords, k=k_neigh + 1)
pairs_set = set()
map(lambda row: map(lambda elm: pairs_set.add((min(row[0], elm), max(row[0], elm))), row[1:]), indices)
w_data[0] += [1] * len(pairs_set)
w_data[1][0] += [first_node + pair[0] for pair in pairs_set]
w_data[1][1] += [first_node + pair[1] for pair in pairs_set]
else:
comm_coords = coords[first_node:first_node + com_siz]
kdtree = spatial.KDTree(comm_coords)
pairs_set = kdtree.query_pairs(epsilon)
w_data[0] += [1] * len(pairs_set)
w_data[1][0] += [first_node + elem[0] for elem in pairs_set]
w_data[1][1] += [first_node + elem[1] for elem in pairs_set]
first_node += com_siz
# Inter-community edges construction #
M = (N**2 - np.sum([com_siz**2 for com_siz in info['comm_sizes']])) / 2
nb_edges = int(world_density * M)
if world_density < 0.35:
# use regression sampling
inter_edges = set()
while len(inter_edges) < nb_edges:
new_point = np.random.randint(0, N, 2)
if info['node_com'][min(new_point)] != info['node_com'][max(new_point)]:
inter_edges.add((min(new_point), max(new_point)))
else:
# use random permutation
indices = np.random.permutation(M)[:nb_edges]
all_points, first_col = [], 0
for i in range(Nc - 1):
nb_col = info['comm_sizes'][i]
first_row = np.sum(info['comm_sizes'][:i+1])
for j in range(i+1, Nc):
nb_row = info['comm_sizes'][j]
all_points += [(first_row + r, first_col + c) for r in range(nb_row) for c in range(nb_col)]
first_row += nb_row
first_col += nb_col
inter_edges = np.array(all_points)[indices]
w_data[0] += [1] * nb_edges
w_data[1][0] += [elem[0] for elem in inter_edges]
w_data[1][1] += [elem[1] for elem in inter_edges]
w_data[0] += w_data[0]
tmp_w_data = deepcopy(w_data[1][0])
w_data[1][0] += w_data[1][1]
w_data[1][1] += tmp_w_data
w_data[1] = tuple(w_data[1])
W = sparse.coo_matrix(tuple(w_data), shape=(N, N))
for key, value in {'Nc': Nc, 'info': info}:
setattr(self, key, value)
super(Community, self).__init__(W=W, gtype='Community', **kwargs)