Source code for pygsp.graphs.nngraphs.nngraph

# -*- coding: utf-8 -*-

import traceback

import numpy as np
from scipy import sparse, spatial

from pygsp import utils
from pygsp.graphs import Graph  # prevent circular import in Python < 3.5

_logger = utils.build_logger(__name__)

try:
    import pyflann as pfl
    _PFL_IMPORT = True
except Exception:
    _logger.warning('Cannot import pyflann (used for faster kNN computations):'
                    ' {}'.format(traceback.format_exc()))
    _PFL_IMPORT = False


[docs]class NNGraph(Graph):
    r"""Nearest-neighbor graph from given point cloud.

    Parameters
    ----------
    Xin : ndarray
        Input points, Should be an `N`-by-`d` matrix, where `N` is the number
        of nodes in the graph and `d` is the dimension of the feature space.
    NNtype : string, optional
        Type of nearest neighbor graph to create. The options are 'knn' for
        k-Nearest Neighbors or 'radius' for epsilon-Nearest Neighbors (default
        is 'knn').
    use_flann : bool, optional
        Use Fast Library for Approximate Nearest Neighbors (FLANN) or not.
        (default is False)
    center : bool, optional
        Center the data so that it has zero mean (default is True)
    rescale : bool, optional
        Rescale the data so that it lies in a l2-sphere (default is True)
    k : int, optional
        Number of neighbors for knn (default is 10)
    sigma : float, optional
        Width parameter of the similarity kernel (default is 0.1)
    epsilon : float, optional
        Radius for the epsilon-neighborhood search (default is 0.01)
    gtype : string, optional
        The type of graph (default is 'nearest neighbors')
    plotting : dict, optional
        Dictionary of plotting parameters. See :obj:`pygsp.plotting`.
        (default is {})
    symmetrize_type : string, optional
        Type of symmetrization to use for the adjacency matrix. See
        :func:`pygsp.utils.symmetrization` for the options.
        (default is 'average')
    dist_type : string, optional
        Type of distance to compute. See
        :func:`pyflann.index.set_distance_type` for possible options.
        (default is 'euclidean')
    order : float, optional
        Only used if dist_type is 'minkowski'; represents the order of the
        Minkowski distance. (default is 0)

    Examples
    --------
    >>> import matplotlib.pyplot as plt
    >>> X = np.random.RandomState(42).uniform(size=(30, 2))
    >>> G = graphs.NNGraph(X)
    >>> fig, axes = plt.subplots(1, 2)
    >>> _ = axes[0].spy(G.W, markersize=5)
    >>> G.plot(ax=axes[1])

    """

    def __init__(self, Xin, NNtype='knn', use_flann=False, center=True,
                 rescale=True, k=10, sigma=0.1, epsilon=0.01, gtype=None,
                 plotting={}, symmetrize_type='average', dist_type='euclidean',
                 order=0, **kwargs):

        self.Xin = Xin
        self.NNtype = NNtype
        self.use_flann = use_flann
        self.center = center
        self.rescale = rescale
        self.k = k
        self.sigma = sigma
        self.epsilon = epsilon

        if gtype is None:
            gtype = 'nearest neighbors'
        else:
            gtype = '{}, NNGraph'.format(gtype)

        self.symmetrize_type = symmetrize_type

        N, d = np.shape(self.Xin)
        Xout = self.Xin

        if self.center:
            Xout = self.Xin - np.kron(np.ones((N, 1)),
                                      np.mean(self.Xin, axis=0))

        if self.rescale:
            bounding_radius = 0.5 * np.linalg.norm(np.amax(Xout, axis=0) -
                                                   np.amin(Xout, axis=0), 2)
            scale = np.power(N, 1. / float(min(d, 3))) / 10.
            Xout *= scale / bounding_radius

        # Translate distance type string to corresponding Minkowski order.
        dist_translation = {"euclidean": 2,
                            "manhattan": 1,
                            "max_dist": np.inf,
                            "minkowski": order
                            }

        if self.NNtype == 'knn':
            spi = np.zeros((N * k))
            spj = np.zeros((N * k))
            spv = np.zeros((N * k))

            if self.use_flann and _PFL_IMPORT:
                pfl.set_distance_type(dist_type, order=order)
                flann = pfl.FLANN()

                # Default FLANN parameters (I tried changing the algorithm and
                # testing performance on huge matrices, but the default one
                # seems to work best).
                NN, D = flann.nn(Xout, Xout, num_neighbors=(k + 1),
                                 algorithm='kdtree')

            else:
                kdt = spatial.KDTree(Xout)
                D, NN = kdt.query(Xout, k=(k + 1),
                                  p=dist_translation[dist_type])

            for i in range(N):
                spi[i * k:(i + 1) * k] = np.kron(np.ones((k)), i)
                spj[i * k:(i + 1) * k] = NN[i, 1:]
                spv[i * k:(i + 1) * k] = np.exp(-np.power(D[i, 1:], 2) /
                                                float(self.sigma))

        elif self.NNtype == 'radius':

            kdt = spatial.KDTree(Xout)
            D, NN = kdt.query(Xout, k=None, distance_upper_bound=epsilon,
                              p=dist_translation[dist_type])
            count = 0
            for i in range(N):
                count = count + len(NN[i])

            spi = np.zeros((count))
            spj = np.zeros((count))
            spv = np.zeros((count))

            start = 0
            for i in range(N):
                leng = len(NN[i]) - 1
                spi[start:start + leng] = np.kron(np.ones((leng)), i)
                spj[start:start + leng] = NN[i][1:]
                spv[start:start + leng] = np.exp(-np.power(D[i][1:], 2) /
                                                 float(self.sigma))
                start = start + leng

        else:
            raise ValueError('Unknown NNtype {}'.format(self.NNtype))

        W = sparse.csc_matrix((spv, (spi, spj)), shape=(N, N))

        # Sanity check
        if np.shape(W)[0] != np.shape(W)[1]:
            raise ValueError('Weight matrix W is not square')

        # Enforce symmetry. Note that checking symmetry with
        # np.abs(W - W.T).sum() is as costly as the symmetrization itself.
        W = utils.symmetrize(W, method=symmetrize_type)

        super(NNGraph, self).__init__(W=W, gtype=gtype, plotting=plotting,
                                      coords=Xout, **kwargs)