Source code for pygsp.graphs.community

# -*- coding: utf-8 -*-

from __future__ import division

import collections
import copy

import numpy as np
from scipy import sparse, spatial

from pygsp import utils
from . import Graph  # prevent circular import in Python < 3.5


[docs]class Community(Graph): r"""Community graph. Parameters ---------- N : int Number of nodes (default = 256). Nc : int (optional) Number of communities (default = :math:`\lfloor \sqrt{N}/2 \rceil`). min_comm : int (optional) Minimum size of the communities (default = :math:`\lfloor N/Nc/3 \rceil`). min_deg : int (optional) NOT IMPLEMENTED. Minimum degree of each node (default = 0). comm_sizes : int (optional) Size of the communities (default = random). size_ratio : float (optional) Ratio between the radius of world and the radius of communities (default = 1). world_density : float (optional) Probability of a random edge between two different communities (default = 1/N). comm_density : float (optional) Probability of a random edge inside any community (default = None, which implies k_neigh or epsilon will be used to determine intra-edges). k_neigh : int (optional) Number of intra-community connections. Not used if comm_density is defined (default = None, which implies comm_density or epsilon will be used to determine intra-edges). epsilon : float (optional) Largest distance at which two nodes sharing a community are connected. Not used if k_neigh or comm_density is defined (default = :math:`\sqrt{2\sqrt{N}}/2`). seed : int Seed for the random number generator (for reproducible graphs). Examples -------- >>> import matplotlib.pyplot as plt >>> G = graphs.Community(N=250, Nc=3, comm_sizes=[50, 120, 80], seed=42) >>> fig, axes = plt.subplots(1, 2) >>> _ = axes[0].spy(G.W, markersize=0.5) >>> G.plot(ax=axes[1]) """ def __init__(self, N=256, Nc=None, min_comm=None, min_deg=0, comm_sizes=None, size_ratio=1, world_density=None, comm_density=None, k_neigh=None, epsilon=None, seed=None, **kwargs): if Nc is None: Nc = int(round(np.sqrt(N) / 2)) if min_comm is None: min_comm = int(round(N / (3 * Nc))) if world_density is None: world_density = 1 / N if not 0 <= world_density <= 1: raise ValueError('World density should be in [0, 1].') if epsilon is None: epsilon = np.sqrt(2 * np.sqrt(N)) / 2 rs = np.random.RandomState(seed) self.logger = utils.build_logger(__name__, **kwargs) w_data = [[], [[], []]] if min_comm * Nc > N: raise ValueError('The constraint on minimum size for communities is unsolvable.') info = {'node_com': None, 'comm_sizes': None, 'world_rad': None, 'world_density': world_density, 'min_comm': min_comm} # Communities construction # if comm_sizes is None: mandatory_labels = np.tile(np.arange(Nc), (min_comm,)) # min_comm labels for each of the Nc communities remaining_labels = rs.choice(Nc, N - min_comm * Nc) # random choice for the remaining labels info['node_com'] = np.sort(np.concatenate((mandatory_labels, remaining_labels))) else: if len(comm_sizes) != Nc: raise ValueError('There should be Nc community sizes.') if np.sum(comm_sizes) != N: raise ValueError('The sum of community sizes should be N.') # create labels based on the constraint given for the community sizes. No random assignation here. info['node_com'] = np.concatenate([[val] * cnt for (val, cnt) in enumerate(comm_sizes)]) counts = collections.Counter(info['node_com']) info['comm_sizes'] = np.array([cnt[1] for cnt in sorted(counts.items())]) info['world_rad'] = size_ratio * np.sqrt(N) # Intra-community edges construction # if comm_density is not None: # random picking edges following the community density (same for all communities) comm_density = float(comm_density) comm_density = comm_density if 0. <= comm_density <= 1. else 0.1 info['comm_density'] = comm_density self.logger.info('Constructed using community density = {}'.format(comm_density)) elif k_neigh is not None: # k-NN among the nodes in the same community (same k for all communities) if k_neigh < 0: raise ValueError('k_neigh cannot be negative.') info['k_neigh'] = k_neigh self.logger.info('Constructed using K-NN with k = {}'.format(k_neigh)) else: # epsilon-NN among the nodes in the same community (same eps for all communities) info['epsilon'] = epsilon self.logger.info('Constructed using eps-NN with eps = {}'.format(epsilon)) # Coordinates # info['com_coords'] = info['world_rad'] * np.array(list(zip( np.cos(2 * np.pi * np.arange(1, Nc + 1) / Nc), np.sin(2 * np.pi * np.arange(1, Nc + 1) / Nc)))) coords = rs.rand(N, 2) # nodes' coordinates inside the community coords = np.array([[elem[0] * np.cos(2 * np.pi * elem[1]), elem[0] * np.sin(2 * np.pi * elem[1])] for elem in coords]) for i in range(N): # set coordinates as an offset from the center of the community it belongs to comm_idx = info['node_com'][i] comm_rad = np.sqrt(info['comm_sizes'][comm_idx]) coords[i] = info['com_coords'][comm_idx] + comm_rad * coords[i] first_node = 0 for i in range(Nc): com_siz = info['comm_sizes'][i] M = com_siz * (com_siz - 1) / 2 if comm_density is not None: nb_edges = int(comm_density * M) tril_ind = np.tril_indices(com_siz, -1) indices = rs.permutation(int(M))[:nb_edges] w_data[0] += [1] * nb_edges w_data[1][0] += [first_node + tril_ind[1][elem] for elem in indices] w_data[1][1] += [first_node + tril_ind[0][elem] for elem in indices] elif k_neigh is not None: comm_coords = coords[first_node:first_node + com_siz] kdtree = spatial.KDTree(comm_coords) __, indices = kdtree.query(comm_coords, k=k_neigh + 1) pairs_set = set() map(lambda row: map(lambda elm: pairs_set.add((min(row[0], elm), max(row[0], elm))), row[1:]), indices) w_data[0] += [1] * len(pairs_set) w_data[1][0] += [first_node + pair[0] for pair in pairs_set] w_data[1][1] += [first_node + pair[1] for pair in pairs_set] else: comm_coords = coords[first_node:first_node + com_siz] kdtree = spatial.KDTree(comm_coords) pairs_set = kdtree.query_pairs(epsilon) w_data[0] += [1] * len(pairs_set) w_data[1][0] += [first_node + elem[0] for elem in pairs_set] w_data[1][1] += [first_node + elem[1] for elem in pairs_set] first_node += com_siz # Inter-community edges construction # M = (N**2 - np.sum([com_siz**2 for com_siz in info['comm_sizes']])) / 2 nb_edges = int(world_density * M) if world_density < 0.35: # use regression sampling inter_edges = set() while len(inter_edges) < nb_edges: new_point = rs.randint(0, N, 2) if info['node_com'][min(new_point)] != info['node_com'][max(new_point)]: inter_edges.add((min(new_point), max(new_point))) else: # use random permutation indices = rs.permutation(int(M))[:nb_edges] all_points, first_col = [], 0 for i in range(Nc - 1): nb_col = info['comm_sizes'][i] first_row = np.sum(info['comm_sizes'][:i+1]) for j in range(i+1, Nc): nb_row = info['comm_sizes'][j] all_points += [(first_row + r, first_col + c) for r in range(nb_row) for c in range(nb_col)] first_row += nb_row first_col += nb_col inter_edges = np.array(all_points)[indices] w_data[0] += [1] * nb_edges w_data[1][0] += [elem[0] for elem in inter_edges] w_data[1][1] += [elem[1] for elem in inter_edges] w_data[0] += w_data[0] tmp_w_data = copy.deepcopy(w_data[1][0]) w_data[1][0] += w_data[1][1] w_data[1][1] += tmp_w_data w_data[1] = tuple(w_data[1]) W = sparse.coo_matrix(tuple(w_data), shape=(N, N)) for key, value in {'Nc': Nc, 'info': info}.items(): setattr(self, key, value) super(Community, self).__init__(W=W, gtype='Community', coords=coords, **kwargs)