Source code for pygsp.graphs.nngraphs.nngraph

# -*- coding: utf-8 -*-

import traceback

import numpy as np
from scipy import sparse, spatial

from pygsp import utils
from pygsp.graphs import Graph  # prevent circular import in Python < 3.5

_logger = utils.build_logger(__name__)

try:
    import pyflann as pfl
    _PFL_IMPORT = True
except Exception:
    _logger.warning('Cannot import pyflann (used for faster kNN computations):'
                    ' {}'.format(traceback.format_exc()))
    _PFL_IMPORT = False


[docs]class NNGraph(Graph): r"""Nearest-neighbor graph from given point cloud. Parameters ---------- Xin : ndarray Input points, Should be an `N`-by-`d` matrix, where `N` is the number of nodes in the graph and `d` is the dimension of the feature space. NNtype : string, optional Type of nearest neighbor graph to create. The options are 'knn' for k-Nearest Neighbors or 'radius' for epsilon-Nearest Neighbors (default is 'knn'). use_flann : bool, optional Use Fast Library for Approximate Nearest Neighbors (FLANN) or not. (default is False) center : bool, optional Center the data so that it has zero mean (default is True) rescale : bool, optional Rescale the data so that it lies in a l2-sphere (default is True) k : int, optional Number of neighbors for knn (default is 10) sigma : float, optional Width parameter of the similarity kernel (default is 0.1) epsilon : float, optional Radius for the epsilon-neighborhood search (default is 0.01) gtype : string, optional The type of graph (default is 'nearest neighbors') plotting : dict, optional Dictionary of plotting parameters. See :obj:`pygsp.plotting`. (default is {}) symmetrize_type : string, optional Type of symmetrization to use for the adjacency matrix. See :func:`pygsp.utils.symmetrization` for the options. (default is 'average') dist_type : string, optional Type of distance to compute. See :func:`pyflann.index.set_distance_type` for possible options. (default is 'euclidean') order : float, optional Only used if dist_type is 'minkowski'; represents the order of the Minkowski distance. (default is 0) Examples -------- >>> import matplotlib.pyplot as plt >>> X = np.random.RandomState(42).uniform(size=(30, 2)) >>> G = graphs.NNGraph(X) >>> fig, axes = plt.subplots(1, 2) >>> _ = axes[0].spy(G.W, markersize=5) >>> G.plot(ax=axes[1]) """ def __init__(self, Xin, NNtype='knn', use_flann=False, center=True, rescale=True, k=10, sigma=0.1, epsilon=0.01, gtype=None, plotting={}, symmetrize_type='average', dist_type='euclidean', order=0, **kwargs): self.Xin = Xin self.NNtype = NNtype self.use_flann = use_flann self.center = center self.rescale = rescale self.k = k self.sigma = sigma self.epsilon = epsilon if gtype is None: gtype = 'nearest neighbors' else: gtype = '{}, NNGraph'.format(gtype) self.symmetrize_type = symmetrize_type N, d = np.shape(self.Xin) Xout = self.Xin if self.center: Xout = self.Xin - np.kron(np.ones((N, 1)), np.mean(self.Xin, axis=0)) if self.rescale: bounding_radius = 0.5 * np.linalg.norm(np.amax(Xout, axis=0) - np.amin(Xout, axis=0), 2) scale = np.power(N, 1. / float(min(d, 3))) / 10. Xout *= scale / bounding_radius # Translate distance type string to corresponding Minkowski order. dist_translation = {"euclidean": 2, "manhattan": 1, "max_dist": np.inf, "minkowski": order } if self.NNtype == 'knn': spi = np.zeros((N * k)) spj = np.zeros((N * k)) spv = np.zeros((N * k)) if self.use_flann and _PFL_IMPORT: pfl.set_distance_type(dist_type, order=order) flann = pfl.FLANN() # Default FLANN parameters (I tried changing the algorithm and # testing performance on huge matrices, but the default one # seems to work best). NN, D = flann.nn(Xout, Xout, num_neighbors=(k + 1), algorithm='kdtree') else: kdt = spatial.KDTree(Xout) D, NN = kdt.query(Xout, k=(k + 1), p=dist_translation[dist_type]) for i in range(N): spi[i * k:(i + 1) * k] = np.kron(np.ones((k)), i) spj[i * k:(i + 1) * k] = NN[i, 1:] spv[i * k:(i + 1) * k] = np.exp(-np.power(D[i, 1:], 2) / float(self.sigma)) elif self.NNtype == 'radius': kdt = spatial.KDTree(Xout) D, NN = kdt.query(Xout, k=None, distance_upper_bound=epsilon, p=dist_translation[dist_type]) count = 0 for i in range(N): count = count + len(NN[i]) spi = np.zeros((count)) spj = np.zeros((count)) spv = np.zeros((count)) start = 0 for i in range(N): leng = len(NN[i]) - 1 spi[start:start + leng] = np.kron(np.ones((leng)), i) spj[start:start + leng] = NN[i][1:] spv[start:start + leng] = np.exp(-np.power(D[i][1:], 2) / float(self.sigma)) start = start + leng else: raise ValueError('Unknown NNtype {}'.format(self.NNtype)) W = sparse.csc_matrix((spv, (spi, spj)), shape=(N, N)) # Sanity check if np.shape(W)[0] != np.shape(W)[1]: raise ValueError('Weight matrix W is not square') # Enforce symmetry. Note that checking symmetry with # np.abs(W - W.T).sum() is as costly as the symmetrization itself. W = utils.symmetrize(W, method=symmetrize_type) super(NNGraph, self).__init__(W=W, gtype=gtype, plotting=plotting, coords=Xout, **kwargs)